def main():
    with open('ca_dlink_filelist.csv', 'r') as fin:
        cr = csv.reader(fin)
        next(cr)
        rows = [[model, rev, ver, url, date]
                for model, rev, ver, url, date in cr]
    for index, row in enumerate(rows):
        model, rev, fw_ver, ftp_url, date = row
        try:
            fname = download_file(ftp_url)
            sha1 = getFileSha1(fname)
            md5 = getFileMd5(fname)
            fsize = os.path.getsize(fname)
            rows[index] = [model, rev, fw_ver, ftp_url, date, fsize, sha1, md5]
        except urllib.error.URLError:
            print('Failed:', ftp_url)
            rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', '']
        except ValueError:
            print('Failed wrong url "%s"' % ftp_url)
            rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', '']
    with open('ca_dlink_filelist2.csv', 'w') as fout:
        cw = csv.writer(fout)
        cw.writerow([
            'model', 'rev', 'fw_ver', 'ftp_url', 'date', 'size', 'sha1', 'md5'
        ])
        cw.writerows(rows)
def download_ftp_file(model, fver, fdate, furl):
    try:
        host = ftputil.FTPHost(
            urlsplit(furl).hostname, "anonymous", "*****@*****.**")
    except ftputil.error.FTPOSError as ex:
        print('ex=%s, furl=%s' % (ex, furl))
        return
    try:
        host.keep_alive()
        if not fdate:
            fdate = host.path.getmtime(urlsplit(furl).path)
        fsize = host.path.getsize(urlsplit(furl).path)
        needDownload, fname = determine_ftp_filename(host, furl)
        if needDownload:
            fsize = host.path.getsize(urlsplit(furl).path)
            print("Start download %s -> \"%s\" %d bytes" %
                  (furl, fname, fsize))
            host.download(urlsplit(furl).path, dlDir + fname)
            print("Finished download \"%s\" -> %s %d bytes" %
                  (furl, fname, fsize))
        else:
            print('Already downloaded %s' % (furl))
        md5 = getFileMd5(dlDir + fname)
        sha1 = getFileSha1(dlDir + fname)
        fsize = os.path.getsize(dlDir + fname)
        with open('zyxel_us_filelist.csv', 'a') as fout:
            cw = csv.writer(fout)
            cw.writerow([model, fver, fname, furl, fdate, fsize, sha1, md5])
    except TimeoutError as ex:
        print('Tomeout Error ex=%s, furl=%s' % (ex, furl))
    except BaseException as ex:
        print('ex=%s, furl=%s' % (ex, furl))
        traceback.print_exc()
    finally:
        host.close()
def main():
    global startTrail,prevTrail,conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
        conn= sqlite3.connect('arris.sqlite3')
        csr=conn.cursor()
        rows = csr.execute(
            "SELECT id,file_url,file_sha1 FROM TFiles ORDER BY id "
            "LIMIT -1 OFFSET %d"%startIdx
            ).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,file_url,file_sha1 = row
            if not file_url:
                continue
            if file_sha1:
                continue
            uprint('idx=%d'%idx)
            try:
                local_file  = downloadFile(file_url, "Content-Disposition")
            except TypeError:
                continue
            file_sha1 = getFileSha1(local_file)
            file_size = path.getsize(local_file)
            csr.execute(
                "UPDATE TFiles SET file_sha1=:file_sha1,file_size=:file_size"
                " WHERE id = :devId", locals())
            conn.commit()
            ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword)
            uprint('upload to GRID')
            ftp.upload(local_file, path.basename(local_file))
            ftp.close()
            os.remove(local_file)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
 def epilog():
     sha1 = getFileSha1(localstor + fname)
     md5 = getFileMd5(localstor + fname)
     with open('jp_dlink_filelist.csv', 'a') as fout:
         cw = csv.writer(fout)
         cw.writerow(
             [model, fw_ver, fw_url, rel_date, fsize, sha1, md5])
def downloadFile(ftp, model, rfile):
    global prevTrail,conn
    csr = conn.cursor()
    try:
        fname = ftp.path.basename(rfile)
        epoch = ftp.path.getmtime(rfile)
        fwDate = datetime.fromtimestamp(epoch) 
        fileSize = ftp.path.getsize(rfile)
        lfile = path.join(dlDir,fname)
        uprint('download "%s"'%fname)
        ftp.download(rfile, lfile)

        fileSha1=getFileSha1(lfile)
        fileUrl="ftp://"+zyxel_ftp+"/"+rfile
        modelName = model.replace('_',' ')

        trailStr=str(prevTrail)
        csr.execute("INSERT OR REPLACE INTO TFiles (model,"
            "fw_date,file_size,file_sha1,file_url,tree_trail) VALUES "
            "(:modelName,:fwDate,:fileSize,:fileSha1,:fileUrl,:trailStr)",
            locals())
        conn.commit()
        uprint('UPSERT fileSha1=%(fileSha1)s, fileSize=%(fileSize)s'
                ' model="%(modelName)s", trail=%(trailStr)s' %locals())
        with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as grid:
            grid.upload(lfile, path.basename(lfile))
            uprint('uploaded "%s" to ftp://%s/'
                %(path.basename(lfile), ftpHostName))
        os.remove(lfile)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def download(ftpurl):
    try:
        fname = ftpurl.split('/')[-1]
        if fname.lower() in ['thumbs.db']:
            return
        if fname.split('.')[-1].lower() in ['pdf', 'txt']:
            return
        with ftputil.FTPHost('files.dlink.com.au', 'anonymous', '') as host:
            fsize = host.path.getsize(ftpurl)
            fdate = host.path.getmtime(ftpurl)
            if os.path.isfile(localstor +
                              fname) and os.path.getsize(localstor +
                                                         fname) == fsize:
                print('%(fname)s already exists' % locals())
                return
            print('Start downloading %(ftpurl)s' % locals())
            host.download(ftpurl, localstor + fname)
            print('Finised downloading %(ftpurl)s' % locals())
            file_sha1 = getFileSha1(localstor + fname)
            file_md5 = getFileMd5(localstor + fname)
            with open('au_dlink_ftp_filelist.csv', 'a') as fout:
                cw = csv.writer(fout)
                cw.writerow([ftpurl, fsize, fdate, file_sha1, file_md5])
    except Exception as ex:
        print(ex)
def download_file(model, fdesc, furl):  # noqa
    try:
        with closing(requests.get(url=furl, timeout=30, stream=True)) as resp:
            if 'Content-Length' in resp.headers:
                fsize = int(resp.headers['Content-Length'])
            else:
                fsize = None
            if not fsize:
                print('Unknown size resp.url=%s, headers=%s' %
                      (resp.url, resp.headers))
                with open('netgear_cn_filelist.csv', 'a') as fout:
                    cw = csv.writer(fout)
                    cw.writerow([
                        model, "", "", furl, None, fsize, "unknown", "unknown"
                    ])
                return
            if 'Last-Modified' in resp.headers:
                fdate = resp.headers['Last-Modified']
                fdate = parse_date(fdate)
            else:
                fdate = None
            try:
                fver = re.search(r'\d+(\.\d+)+', fdesc).group(0)
            except AttributeError:
                fver = ''
            needDownload, fname = determine_filename(resp)
            if not needDownload:
                print('Already downloaded: ', fname)
            else:
                print('Start downloading (%d bytes): %s' % (fsize, furl))
                with open(dlDir + fname, 'wb') as fout:
                    fout.write(b'place_holder0')
                with open(dlDir + fname + '.downloading', 'wb') as fout:
                    for chunk in resp.iter_content(chunk_size=8192):
                        if chunk:  # filter out keep-alive new chunks
                            fout.write(chunk)
                try:
                    os.replace(dlDir + fname + '.downloading', dlDir + fname)
                except BaseException as ex:
                    print(ex)
                    print('"%s" not found' % (dlDir + fname + '.downloading'))
                print('Finished downloading: %s' % furl)
            sha1 = getFileSha1(dlDir + fname)
            md5 = getFileMd5(dlDir + fname)
            if fsize and os.path.getsize(dlDir + fname) != fsize:
                print('Content-Length(%s) different to real fsize %s' %
                      (fsize, os.path.getsize(dlDir + fname)))
            fsize = os.path.getsize(dlDir + fname)
            with open('netgear_cn_filelist.csv', 'a') as fout:
                cw = csv.writer(fout)
                cw.writerow(
                    [model, fver, fname, furl, fdate, fsize, sha1, md5])
    except requests.exceptions.ConnectionError:
        print('ConnectionError: %s' % furl)
    except requests.exceptions.ReadTimeout:
        print('ReadTimeout: %s' % furl)
    except BaseException as ex:
        traceback.print_exc()
def main():
    global startTrail,prevTrail,conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
        conn= sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3')
        csr=conn.cursor()
        rows = csr.execute(
            "SELECT id,file_name,file_url,file_sha1,file_size FROM TFiles"
            " ORDER BY id LIMIT -1 OFFSET %d"%startIdx
            ).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,file_name,file_url,file_sha1,file_size = row
            if not file_url:
                continue
            if file_sha1:
                continue
            if 'Android' in file_name:
                continue
            if 'Ascend' in file_name:
                continue
            if 'Honor' in file_name:
                continue
            if 'Open Source' in file_name:
                continue
            if 'opensource' in file_name.lower():
                continue

            uprint('idx=%d, file_name="%s",file_size=%d'%(idx,file_name,file_size))
            model = guessModel(file_name)
            uprint('model="%s"'%model)
            if not model:
                continue
            fw_ver = guessVersion(file_name)
            uprint('fw_ver="%s"'%fw_ver)
            if not fw_ver:
                continue
            try:
                local_file  = downloadFile(file_url, "Content-Disposition")
            except TypeError:
                continue
            file_sha1 = getFileSha1(local_file)
            file_size = path.getsize(local_file)
            csr.execute(
                "UPDATE TFiles SET file_sha1=:file_sha1"
                ",file_size=:file_size"
                ",model=:model "
                ",fw_ver=:fw_ver "
                " WHERE id = :devId", locals())
            conn.commit()
            ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword)
            uprint('upload to GRID')
            ftp.upload(local_file, path.basename(local_file))
            ftp.close()
            os.remove(local_file)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def download_file(model, fver, text, furl):  #noqa
    try:
        with closing(requests.get(url=furl, timeout=30, stream=True)) as resp:
            if 'Content-Length' in resp.headers:
                fsize = int(resp.headers['Content-Length'])
            else:
                fsize = None
            if 'Last-Modified' in resp.headers:
                fdate = resp.headers['Last-Modified']
                fdate = parse_date(fdate)
            else:
                fdate = None
            fname = os.path.basename(urlsplit(furl).path)
            alreadyDownloaded = False
            if os.path.exists(dlDir +
                              fname) and os.path.getsize(dlDir +
                                                         fname) == fsize:
                alreadyDownloaded = True
            elif os.path.exists(
                    dlDir + fname) and os.path.getsize(dlDir + fname) != fsize:
                # rename until not os.path.exist(fname)
                while os.path.exists(dlDir + fname):
                    ftitle, fext = os.path.splitext(fname)
                    m = re.search('(.+)_(\d+)', ftitle)
                    if m:
                        ftitle = m.group(1) + '_' + str(int(m.group(2)) + 1)
                        fname = ftitle + fext
                    else:
                        fname = ftitle + "_1" + fext

            if not alreadyDownloaded:
                print('Start downloading %s -> "%s" %d bytes' %
                      (furl, fname, fsize))
                with open(dlDir + fname, 'wb') as fout:
                    for chunk in resp.iter_content(chunk_size=8192):
                        if chunk:  # filter out keep-alive new chunks
                            fout.write(chunk)
                print('Finished downloading %s -> "%s" %d bytes' %
                      (furl, fname, fsize))
            else:
                print('Already downloaded %s' % furl)
            md5 = getFileMd5(dlDir + fname)
            sha1 = getFileSha1(dlDir + fname)
            fsize = os.path.getsize(dlDir + fname)
            with open('tenda_filelist.csv', 'a') as fout:
                cw = csv.writer(fout)
                cw.writerow(
                    [model, fver, fname, furl, fdate, fsize, sha1, md5])
    except TimeoutError as ex:
        print('TomeoutError ex=%s, furl=%s' % (ex, furl))
    except BaseException as ex:
        print('ex=%s, furl=%s' % (ex, furl))
        traceback.print_exc()
 def epilog(fsize, fdate):
     if not os.path.isfile(localstor + fname):
         sha1 = None
         md5 = None
     else:
         sha1 = getFileSha1(localstor + fname)
         md5 = getFileMd5(localstor + fname)
     with open('us_dlink_filelist.csv', 'a') as fout:
         cw = csv.writer(fout)
         cw.writerow(
             [model, rev, fw_ver, fw_url, fsize, fdate, sha1, md5])
     return
def download(session, url, model, filename, fw_ver, fdate):
    from urllib import parse
    fname = filename
    try:
        doccont = session.get(
            url=url,
            headers={'Referer':'http://tsd.dlink.com.tw/downloads2008detailgo.asp',
                     'Upgrade-Insecure-Requests':'1'}, stream=True, timeout=30)
        fw_url = doccont.url
        # print('fw_url=', fw_url)
        docParams = parse.parse_qs(parse.urlsplit(doccont.url).query)
        # print('docParams=', docParams)
        if 'fileName' in docParams:
            fname = docParams['fileName'][0]
        else:
            fname = os.path.basename(parse.urlsplit(fw_url).path)
        if 'fileSize' in docParams:
            fsize = int(float(docParams['fileSize'][0]))
            # print('fsize=', fsize)
        if 'Content-Length' in doccont.headers:
            fsize = int(doccont.headers['Content-Length'])
            # print('fsize=Content-Length=', fsize)
        if 'Content-Disposition' in doccont.headers:
            fname = doccont.headers['Content-Disposition'].split(';', 1)[1].split('=', 1)[1]
        if 'fsize' in locals():
            if os.path.isfile(localstor+fname) and os.path.getsize(localstor+fname)==fsize:
                # print('"%s" already exists'%(localstor+fname))
                return
        print('Start Downloading "%s" to "%s"' % (doccont.url, localstor+fname))
        with open(localstor + fname, 'wb') as fout:
            for chunk in doccont.iter_content(4096):
                fout.write(chunk)
        fsize = os.path.getsize(localstor + fname)
        print('Finisehd Downloading "%s" to "%s", fsize=%d' % (doccont.url, localstor+fname, fsize))
        sha1 = getFileSha1(localstor + fname)
        md5 = getFileMd5(localstor + fname)
        with open('tsd_dlink_filelist.csv', 'a') as fout:
            cw = csv.writer(fout)
            cw.writerow([model, '', fw_ver, fw_url, fdate, fsize, sha1, md5])
    except socket.timeout:
        print('socket timeout error, url=', url)
        return
    except requests.exceptions.Timeout as ex:
        print('requests timeoute error, url=', url)
        return
    except BaseException as ex:
        print('unknown error, url=',url)
        traceback.print_exc()
        print(ex)
        return
def download_file(model, desc, fw_url):
    try:
        with closing(requests.get(url=fw_url, timeout=10, stream=True)) as resp:
            if 'Content-Length' in resp.headers:
                fileSize = int(resp.headers['Content-Length'])
                print('fileSize=', fileSize)
            else:
                fileSize=None
            try:
                fw_ver = re.search(r'\d+(\.\d+)+', desc).group(0)
            except AttributeError:
                fw_ver = ''
            fileName = os.path.basename(urlsplit(fw_url).path)
            print('fileName=', fileName)
            if not fileName:
                print('No fileName:, url=', fw_url)
                return
            if 'Last-Modified' in resp.headers:
                fw_date= resp.headers['Last-Modified']
                fw_date = parse_date(fw_date)
            else:
                fw_date = None
            if os.path.isfile(dlDir+fileName) \
                    and fileSize==os.path.getsize(dlDir+fileName):
                print('already downloaded: ', fileName)
            else:
                print('start downloading: ', fw_url)
                with open(dlDir+fileName+'.downloading', 'wb') as fout:
                    for chunk in resp.iter_content(chunk_size=8192):
                        if chunk:  # filter out keep-alive new chunks
                            fout.write(chunk)
                try:
                    os.rename(dlDir+fileName+'.downloading', dlDir+fileName)
                except FileNotFoundError:
                    print('"%s" not found'%(dlDir+fileName+'.downloading'))
                print('finished downloading: ', fw_url)
            sha1 = getFileSha1(dlDir+fileName)
            md5 = getFileMd5(dlDir+fileName)
            if fileSize and os.path.getsize(dlDir+fileName)!=fileSize:
                print('Content-Length(%s) different to real fileSize %s' % (fileSize, os.path.getsize(dlDir+fileName)))
            fileSize = os.path.getsize(dlDir+fileName)
            with open('netgear_filelist.csv', 'a') as fout:
                cw = csv.writer(fout)
                cw.writerow([model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5])
    except BaseException as ex:
        traceback.print_exc()
        import pdb
        pdb.set_trace()
def main():
    startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
    global conn
    with sqlite3.connect('belkin.sqlite3') as conn:
        csr=conn.cursor()
        rows=csr.execute("SELECT id,download_url,page_url,file_sha1"
                " FROM TFiles LIMIT -1 OFFSET %d"%startIdx).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,url,page_url,fileSha1=row
            if fileSha1 or not url:
                continue
            uprint('idx=%s, url=%s'%(idx,url))
            url = safeUrl(url)
            uprint('url='+url)
            fname=urlFileName(url)
            uprint('download "%s"'%(fname))
            fname = path.join(dlDir, fname)
            try:
                downloadFile(url, fname)
            except urllib.error.HTTPError:
                print(ex)
                continue
            except OSError as ex:
                if ex.errno == 28:
                    print(ex);print('[Errno 28] No space left on device')
                    break
            except Exception as ex:
                ipdb.set_trace()
                traceback.print_exc()
                continue

            fileSha1=getFileSha1(fname)
            fileSize=path.getsize(fname)
            print('sha1="%s" for "%s"'%(fileSha1,fname))
            csr.execute("UPDATE TFiles SET file_sha1=:fileSha1,"
                    " file_size=:fileSize WHERE id=:devId", locals())
            conn.commit()
            uprint('UPDATE fileSha1=%(fileSha1)s, fileSize=%(fileSize)s'
                    ' WHERE id="%(devId)s"' %locals())
            with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as ftp:
                ftp.upload(fname, path.basename(fname))
                uprint('uploaded "%s" to ftp://%s/'
                    %(path.basename(fname), ftpHostName))
            os.remove(fname)
def upsert_psql(file_name, fw_url, model, version, rel_date):
    try:
        conn = psycopg2.connect(database="firmware", user="******",
                                password="******", host="127.0.0.1")
        cur = conn.cursor()
        brand_id=1
        file_sha1 = getFileSha1(file_name)
        file_md5 = getFileMd5(file_name)
        file_size = os.path.getsize(file_name)
        cur.execute("INSERT INTO image \
                    (filename, brand, model, version, rel_date, brand_id, \
                    file_size, hash, file_sha1, file_url) VALUES \
                    (      %s,    %s,    %s,      %s,       %s,       %s, \
                           %s,   %s,        %s,       %s)",
                    (file_name, 'Avm', model, version, rel_date, brand_id,
                     file_size, file_md5, file_sha1, fw_url))
        conn.commit()
    finally:
        conn.close()
Example #15
0
def main():
    with open('dlink_ftp.dlink.eu_filelist.csv', 'w') as fout:
        cw = csv.writer(fout, dialect='excel')
        cw.writerow([
            'ftp_url', 'file_size', 'file_date', 'model', 'file_sha1',
            'file_md5'
        ])
        with open('dlink_ftp.dlink.eu_filelist.txt', 'r') as fin:
            for line in fin:
                line = line.strip()
                if not line:
                    continue
                ftpurl, fsize, fdate = line.split('\t', 2)
                fdate = datetime.fromtimestamp(float(fdate))
                fname = 'output/D-Link/ftp.dlink.eu/' + ftpurl.split('/')[-1]
                sha1 = getFileSha1(fname)
                md5 = getFileMd5(fname)
                fsize = path.getsize(fname)
                model = get_model_from_ftp_url(ftpurl)
                cw.writerow([ftpurl, fsize, fdate, model, sha1, md5])
                print('%s,%s,%s,%s' % (ftpurl, fsize, fdate, model))
Example #16
0
def download_file(hw_rev, model, fver, fdate, furl):  #noqa
    try:
        with closing(requests.get(url=furl, timeout=10, stream=True)) as resp:
            if 'Last-Modified' in resp.headers:
                fdate = resp.headers['Last-Modified']
                fdate = parse_date(fdate)
            if 'Content-Disposition' in resp.headers:
                fname = resp.headers['Content-Disposition']
                fname = fname.split(';')[-1].split('=')[-1].strip()
            if 'Content-Length' in resp.headers:
                fsize = resp.headers['Content-Length']
                fsize = int(fsize)
            fname = os.path.basename(urlsplit(furl).path)
            if os.path.isfile(dlDir + fname) \
                    and fsize == os.path.getsize(dlDir + fname):
                print('already downloaded: ', fname)
            else:
                print('start downloading: ', furl)
                with open(dlDir + fname + '.downloading', 'wb') as fout:
                    for chunk in resp.iter_content(chunk_size=8192):
                        if chunk:  # filter out keep-alive new chunks
                            fout.write(chunk)
                try:
                    os.rename(dlDir + fname + '.downloading', dlDir + fname)
                except FileNotFoundError:
                    print('"%s" not found' % (dlDir + fname + '.downloading'))
                print('finished downloading: ', furl)
            sha1 = getFileSha1(dlDir + fname)
            md5 = getFileMd5(dlDir + fname)
            if fsize and os.path.getsize(dlDir + fname) != fsize:
                print('Content-Length(%s) different to real fsize %s' %
                      (fsize, os.path.getsize(dlDir+fname)))
            fsize = os.path.getsize(dlDir + fname)
            with open('linksys_filelist.csv', 'a') as fout:
                cw = csv.writer(fout)
                cw.writerow([model, hw_rev, fver, furl, fdate, fsize, sha1, md5])
    except BaseException as ex:
        traceback.print_exc()
        import pdb
        pdb.set_trace()
Example #17
0
def main():
    with open('au_dlink_filelist2.csv', 'w') as fout:
        cw = csv.writer(fout)
        cw.writerow([
            'model', 'rev', 'fw_ver', 'fw_url', 'file_size', 'file_sha1',
            'file_md5', 'file_date'
        ])
        with open('au_dlink_filelist.csv', 'r') as fin:
            cr = csv.reader(fin)
            next(cr)
            for model, rev, fw_ver, fw_url in cr:
                if fw_url.split('.')[-1].lower() in ['pdf']:
                    continue
                fname = download_file(fw_url)
                file_sha1 = getFileSha1(fname)
                file_md5 = getFileMd5(fname)
                file_size = os.path.getsize(fname)
                file_date = get_ftp_date(fw_url)
                cw.writerow([
                    model, rev, fw_ver, fw_url, file_size, file_sha1, file_md5,
                    file_date
                ])
Example #18
0
def upsert_psql(file_name, fw_url, model, version, rel_date):
    try:
        conn = psycopg2.connect(database="firmware",
                                user="******",
                                password="******",
                                host="127.0.0.1")
        cur = conn.cursor()
        brand_id = 1
        file_sha1 = getFileSha1(file_name)
        file_md5 = getFileMd5(file_name)
        file_size = os.path.getsize(file_name)
        cur.execute(
            "INSERT INTO image \
                    (filename, brand, model, version, rel_date, brand_id, \
                    file_size, hash, file_sha1, file_url) VALUES \
                    (      %s,    %s,    %s,      %s,       %s,       %s, \
                           %s,   %s,        %s,       %s)",
            (file_name, 'Avm', model, version, rel_date, brand_id, file_size,
             file_md5, file_sha1, fw_url))
        conn.commit()
    finally:
        conn.close()
def download_file(model, fileName, fw_url):
    try:
        resp = requests.get(url=fw_url, stream=True)
        if 'Content-Length' in resp.headers:
            fileSize = int(resp.headers['Content-Length'])
            print('fileSize=', fileSize)
        else:
            fileSize = None
        try:
            fw_ver = re.search(r'\d+(\.\d+)+', fileName).group(0)
        except AttributeError:
            fw_ver = ''
        fileName = os.path.basename(urllib.parse.urlsplit(fw_url).path)
        print('fileName=', fileName)
        if 'Last-Modified' in resp.headers:
            fw_date = resp.headers['Last-Modified']
            fw_date = parse_date(fw_date)
        else:
            fw_date = None
        if os.path.isfile(dlDir+fileName) \
                and fileSize==os.path.getsize(dlDir+fileName):
            print('already downloaded: ', fileName)
        else:
            print('start downloading: ', fw_url)
            with open(dlDir + fileName + '.downloading', 'wb') as fout:
                for chunk in resp.iter_content(8192):
                    fout.write(chunk)
            os.rename(dlDir + fileName + '.downloading', dlDir + fileName)
            print('finished downloading: ', fw_url)
        sha1 = getFileSha1(dlDir + fileName)
        md5 = getFileMd5(dlDir + fileName)
        fileSize = os.path.getsize(dlDir + fileName)
        with open('netgear_filelist.csv', 'a') as fout:
            cw = csv.writer(fout)
            cw.writerow([
                model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5
            ])
    except BaseException as ex:
        traceback.print_exc()
def main():
    dlDir='./output/netgear/downloadcenter.netgear.com_form_submit/'
    with open('netgear_filelist.csv', 'r') as fin:
        cr = csv.reader(fin)
        next(cr)
        rows = [(model, fver, fname, furl,parse_date(fdate), int(fsize), sha1, md5)
                for model, fver, fname, furl, fdate, fsize, sha1, md5 in cr]

    with open('netgear_filelist2.csv', 'w') as fout:
        cw = csv.writer(fout)
        cw.writerow(['model', 'fw_ver', 'fileName', 'fw_url', 'fw_data', 'fileSize', 'sha1', 'md5'])
        for model, fver, fname, furl, fdate, fsize, sha1, md5 in rows:
            fsizeC = os.path.getsize(dlDir+fname)
            sha1C = getFileSha1(dlDir+fname)
            md5C = getFileMd5(dlDir+fname)
            if fsizeC != fsize:
                print('"%s" wrong fileSize(%s), correct= %s'%(fname, fsize, fsizeC))
            elif sha1C != sha1:
                print('"%s" wrong sha1(%s), correct= %s'%(fname, sha1, sha1C))
            elif md5C != md5:
                print('"%s" wrong md5(%s), correct= %s'%(fname, md5, md5C))
            cw.writerow([model, fver, fname, furl, fdate, fsizeC, sha1C, md5C])
            solid_index = next(i for i,_ in enumerate(rows) if _[2]==ftpurl and _[5] and i!=index)
            print('Merge "%s" "%s"'%(model, fw_ver))
            solid_file_size, solid_date,file_sha1, file_md5 = rows[solid_index][3:7]
            assert file_sha1
            if file_date is None:
                file_date = solid_date
            rows[solid_index] = model, fw_ver, ftpurl, solid_file_size, file_date, file_sha1, file_md5
            del rows[index]
        except StopIteration:
            print('Download %s'%ftpurl)
            try:
                fname = download_file(ftpurl)
            except urllib.error.URLError:
                print('Failed to download ', ftpurl)
                continue
            file_sha1 = getFileSha1(fname)
            file_md5 = getFileMd5(fname)
            file_size = os.path.getsize(fname)
            rows[index][3]=file_size
            if rows[index][4] is None:
                get_ftp_date(ftpurl)
            rows[index][5]=file_sha1
            rows[index][6]=file_md5

    rows.sort(key=lambda r:(r[0].lower(),r[1].lower(),r[2].lower()))
    with open('dlink_ftp.dlink.eu_filelist3.csv', 'w') as fout:
        cw = csv.writer(fout)
        cw.writerow(['model','fw_ver','fw_url','size','date','sha1','md5'])
        cw.writerows(rows)

except Exception as ex:
Example #22
0
def parse_download_page(page_url):
    global prevTrail
    d = pq(url=page_url)
    trailStr = ''
    try:
        d('h1.product-name')[0].text_content().strip()
    except IndexError:
        print('%s does NOT exist' % page_url)
        return
    model = d('h2.product-number')[0].text_content().strip()
    for idx, item in enumerate(d('li.download-item'), start=getStartIdx()):
        try:
            title = item.cssselect('h3.thin')[0].text_content()
        except IndexError:
            continue
        if 'firmware' not in title.lower():
            continue

        rel_date = item.cssselect('small')[0].text_content()
        # 'Publication date \r: 18 January 2016'
        rel_date = rel_date.split('\r')[1].strip(': ')
        # '18 January 2016'
        rel_date = datetime.strptime(rel_date, '%d %B %Y')

        fw_ver = item.cssselect('.download-text-title')[0].text_content()
        # 'Version number\r: 2.11'
        fw_ver = fw_ver.split('\r')[-1].strip(': ')
        # '2.11'
        fw_ver = re.search(r'\d+(\.\d+)*', fw_ver).group(0)

        fw_desc = d('.download-item div')[0].text_content().strip()
        # 'Changed:\n\n\n\tAdd timeout to check DNS alive\n\tAdd procedure to
        # verify ipv4 and ipv6 on ppp session"

        fw_url = item.cssselect('a')[0].attrib['href']
        try:
            uprint('start to download %s' % fw_url)
            local_file_path = cookie_friendly_download(page_url,
                                                       fw_url,
                                                       store_dir,
                                                       timeout=1000)
        except urllib.error.HTTPError:
            print(ex)
            continue
        except OSError as ex:
            if ex.errno == 28:
                print(ex)
                print('[Errno 28] No space left on device')
                break
        except Exception as ex:
            ipdb.set_trace()
            traceback.print_exc()
            continue
        file_sha1 = getFileSha1(local_file_path)
        file_md5 = getFileMd5(local_file_path)
        file_size = path.getsize(local_file_path)
        uprint('file_path="%s", file_size=%s, file_sha1=%s' %
               (local_file_path, file_size, file_sha1))

        trailStr = str(prevTrail + [idx])
        psql(
            "INSERT INTO image"
            "(brand, model,"
            " rel_date, version, description,"
            " filename, file_sha1, hash, file_size,"
            " page_url, file_url, tree_trail) VALUES"
            "(    %s,   %s, "
            "       %s,      %s,          %s,"
            "       %s,        %s,   %s,        %s,"
            "       %s,       %s,         %s)",
            ('Sitecom', model, rel_date, fw_ver, fw_desc, local_file_path,
             file_sha1, file_md5, file_size, page_url, fw_url, trailStr))
def main():
    startIdx = int(sys.argv[1]) if len(sys.argv) > 1 else 0
    global conn
    with sqlite3.connect("Linksys.sqlite3") as conn:
        csr = conn.cursor()
        rows = csr.execute(
            "SELECT brand,model,revision,file_title,href,file_sha1 FROM TFiles " "LIMIT -1 OFFSET %d" % startIdx
        ).fetchall()
        for idx, row in enumerate(rows, startIdx):
            brand, model, revision, fileTitle, url, fileSha1 = row
            print("idx= %d, fileSha1=%s" % (idx, fileSha1))
            if fileSha1:
                uprint('"%s" already downloaded, bypass!' % fileTitle)
                continue
            if not url:
                continue
            fname = urlFileName(url)
            if not fname:
                fname = safeFileName(fileTitle)
            uprint("url=" + url)
            uprint('download "%s" as "%s"' % (fileTitle, fname))
            fname = path.join(dlDir, fname)
            try:
                downloadFile(url, fname)
            except urllib.error.HTTPError:
                print(ex)
                continue
            except OSError as ex:
                if ex.errno == 28:
                    print(ex)
                    print("[Errno 28] No space left on device")
                    break
            except Exception as ex:
                import pdb

                pdb.set_trace()
                import traceback

                traceback.print_exc()
                print(ex)
                continue

            fileSha1 = getFileSha1(fname)
            fileSize = path.getsize(fname)
            print('sha1="%s" for "%s"' % (fileSha1, fname))
            csr.execute(
                "UPDATE TFiles SET file_sha1=:fileSha1, file_size=:fileSize"
                " WHERE brand=:brand AND model=:model AND revision=:revision"
                " AND file_title=:fileTitle",
                locals(),
            )
            conn.commit()
            print(
                "UPDATE fileSha1=%(fileSha1)s, fileSize=%(fileSize)d"
                ' WHERE "%(brand)s", "%(model)s", "%(revision)s", '
                '"%(fileTitle)s" ' % locals()
            )
            with ftputil.FTPHost(ftpHostName, ftpUserName, ftpPassword) as ftp:
                ftp.upload(fname, path.basename(fname))
                print('uploaded "%s" to ftp://%s/' % (path.basename(fname), ftpHostName))
            os.remove(fname)