def main(): with open('ca_dlink_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) rows = [[model, rev, ver, url, date] for model, rev, ver, url, date in cr] for index, row in enumerate(rows): model, rev, fw_ver, ftp_url, date = row try: fname = download_file(ftp_url) sha1 = getFileSha1(fname) md5 = getFileMd5(fname) fsize = os.path.getsize(fname) rows[index] = [model, rev, fw_ver, ftp_url, date, fsize, sha1, md5] except urllib.error.URLError: print('Failed:', ftp_url) rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', ''] except ValueError: print('Failed wrong url "%s"' % ftp_url) rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', ''] with open('ca_dlink_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow([ 'model', 'rev', 'fw_ver', 'ftp_url', 'date', 'size', 'sha1', 'md5' ]) cw.writerows(rows)
def download_ftp_file(model, fver, fdate, furl): try: host = ftputil.FTPHost( urlsplit(furl).hostname, "anonymous", "*****@*****.**") except ftputil.error.FTPOSError as ex: print('ex=%s, furl=%s' % (ex, furl)) return try: host.keep_alive() if not fdate: fdate = host.path.getmtime(urlsplit(furl).path) fsize = host.path.getsize(urlsplit(furl).path) needDownload, fname = determine_ftp_filename(host, furl) if needDownload: fsize = host.path.getsize(urlsplit(furl).path) print("Start download %s -> \"%s\" %d bytes" % (furl, fname, fsize)) host.download(urlsplit(furl).path, dlDir + fname) print("Finished download \"%s\" -> %s %d bytes" % (furl, fname, fsize)) else: print('Already downloaded %s' % (furl)) md5 = getFileMd5(dlDir + fname) sha1 = getFileSha1(dlDir + fname) fsize = os.path.getsize(dlDir + fname) with open('zyxel_us_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, fver, fname, furl, fdate, fsize, sha1, md5]) except TimeoutError as ex: print('Tomeout Error ex=%s, furl=%s' % (ex, furl)) except BaseException as ex: print('ex=%s, furl=%s' % (ex, furl)) traceback.print_exc() finally: host.close()
def main(): global startTrail,prevTrail,conn try: startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 conn= sqlite3.connect('arris.sqlite3') csr=conn.cursor() rows = csr.execute( "SELECT id,file_url,file_sha1 FROM TFiles ORDER BY id " "LIMIT -1 OFFSET %d"%startIdx ).fetchall() for idx, row in enumerate(rows,startIdx): devId,file_url,file_sha1 = row if not file_url: continue if file_sha1: continue uprint('idx=%d'%idx) try: local_file = downloadFile(file_url, "Content-Disposition") except TypeError: continue file_sha1 = getFileSha1(local_file) file_size = path.getsize(local_file) csr.execute( "UPDATE TFiles SET file_sha1=:file_sha1,file_size=:file_size" " WHERE id = :devId", locals()) conn.commit() ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) uprint('upload to GRID') ftp.upload(local_file, path.basename(local_file)) ftp.close() os.remove(local_file) except Exception as ex: ipdb.set_trace() traceback.print_exc()
def epilog(): sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('jp_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fw_ver, fw_url, rel_date, fsize, sha1, md5])
def downloadFile(ftp, model, rfile): global prevTrail,conn csr = conn.cursor() try: fname = ftp.path.basename(rfile) epoch = ftp.path.getmtime(rfile) fwDate = datetime.fromtimestamp(epoch) fileSize = ftp.path.getsize(rfile) lfile = path.join(dlDir,fname) uprint('download "%s"'%fname) ftp.download(rfile, lfile) fileSha1=getFileSha1(lfile) fileUrl="ftp://"+zyxel_ftp+"/"+rfile modelName = model.replace('_',' ') trailStr=str(prevTrail) csr.execute("INSERT OR REPLACE INTO TFiles (model," "fw_date,file_size,file_sha1,file_url,tree_trail) VALUES " "(:modelName,:fwDate,:fileSize,:fileSha1,:fileUrl,:trailStr)", locals()) conn.commit() uprint('UPSERT fileSha1=%(fileSha1)s, fileSize=%(fileSize)s' ' model="%(modelName)s", trail=%(trailStr)s' %locals()) with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as grid: grid.upload(lfile, path.basename(lfile)) uprint('uploaded "%s" to ftp://%s/' %(path.basename(lfile), ftpHostName)) os.remove(lfile) except Exception as ex: ipdb.set_trace() traceback.print_exc()
def download(ftpurl): try: fname = ftpurl.split('/')[-1] if fname.lower() in ['thumbs.db']: return if fname.split('.')[-1].lower() in ['pdf', 'txt']: return with ftputil.FTPHost('files.dlink.com.au', 'anonymous', '') as host: fsize = host.path.getsize(ftpurl) fdate = host.path.getmtime(ftpurl) if os.path.isfile(localstor + fname) and os.path.getsize(localstor + fname) == fsize: print('%(fname)s already exists' % locals()) return print('Start downloading %(ftpurl)s' % locals()) host.download(ftpurl, localstor + fname) print('Finised downloading %(ftpurl)s' % locals()) file_sha1 = getFileSha1(localstor + fname) file_md5 = getFileMd5(localstor + fname) with open('au_dlink_ftp_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ftpurl, fsize, fdate, file_sha1, file_md5]) except Exception as ex: print(ex)
def download_file(model, fdesc, furl): # noqa try: with closing(requests.get(url=furl, timeout=30, stream=True)) as resp: if 'Content-Length' in resp.headers: fsize = int(resp.headers['Content-Length']) else: fsize = None if not fsize: print('Unknown size resp.url=%s, headers=%s' % (resp.url, resp.headers)) with open('netgear_cn_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ model, "", "", furl, None, fsize, "unknown", "unknown" ]) return if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) else: fdate = None try: fver = re.search(r'\d+(\.\d+)+', fdesc).group(0) except AttributeError: fver = '' needDownload, fname = determine_filename(resp) if not needDownload: print('Already downloaded: ', fname) else: print('Start downloading (%d bytes): %s' % (fsize, furl)) with open(dlDir + fname, 'wb') as fout: fout.write(b'place_holder0') with open(dlDir + fname + '.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.replace(dlDir + fname + '.downloading', dlDir + fname) except BaseException as ex: print(ex) print('"%s" not found' % (dlDir + fname + '.downloading')) print('Finished downloading: %s' % furl) sha1 = getFileSha1(dlDir + fname) md5 = getFileMd5(dlDir + fname) if fsize and os.path.getsize(dlDir + fname) != fsize: print('Content-Length(%s) different to real fsize %s' % (fsize, os.path.getsize(dlDir + fname))) fsize = os.path.getsize(dlDir + fname) with open('netgear_cn_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fver, fname, furl, fdate, fsize, sha1, md5]) except requests.exceptions.ConnectionError: print('ConnectionError: %s' % furl) except requests.exceptions.ReadTimeout: print('ReadTimeout: %s' % furl) except BaseException as ex: traceback.print_exc()
def main(): global startTrail,prevTrail,conn try: startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 conn= sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3') csr=conn.cursor() rows = csr.execute( "SELECT id,file_name,file_url,file_sha1,file_size FROM TFiles" " ORDER BY id LIMIT -1 OFFSET %d"%startIdx ).fetchall() for idx, row in enumerate(rows,startIdx): devId,file_name,file_url,file_sha1,file_size = row if not file_url: continue if file_sha1: continue if 'Android' in file_name: continue if 'Ascend' in file_name: continue if 'Honor' in file_name: continue if 'Open Source' in file_name: continue if 'opensource' in file_name.lower(): continue uprint('idx=%d, file_name="%s",file_size=%d'%(idx,file_name,file_size)) model = guessModel(file_name) uprint('model="%s"'%model) if not model: continue fw_ver = guessVersion(file_name) uprint('fw_ver="%s"'%fw_ver) if not fw_ver: continue try: local_file = downloadFile(file_url, "Content-Disposition") except TypeError: continue file_sha1 = getFileSha1(local_file) file_size = path.getsize(local_file) csr.execute( "UPDATE TFiles SET file_sha1=:file_sha1" ",file_size=:file_size" ",model=:model " ",fw_ver=:fw_ver " " WHERE id = :devId", locals()) conn.commit() ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) uprint('upload to GRID') ftp.upload(local_file, path.basename(local_file)) ftp.close() os.remove(local_file) except Exception as ex: ipdb.set_trace() traceback.print_exc()
def download_file(model, fver, text, furl): #noqa try: with closing(requests.get(url=furl, timeout=30, stream=True)) as resp: if 'Content-Length' in resp.headers: fsize = int(resp.headers['Content-Length']) else: fsize = None if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) else: fdate = None fname = os.path.basename(urlsplit(furl).path) alreadyDownloaded = False if os.path.exists(dlDir + fname) and os.path.getsize(dlDir + fname) == fsize: alreadyDownloaded = True elif os.path.exists( dlDir + fname) and os.path.getsize(dlDir + fname) != fsize: # rename until not os.path.exist(fname) while os.path.exists(dlDir + fname): ftitle, fext = os.path.splitext(fname) m = re.search('(.+)_(\d+)', ftitle) if m: ftitle = m.group(1) + '_' + str(int(m.group(2)) + 1) fname = ftitle + fext else: fname = ftitle + "_1" + fext if not alreadyDownloaded: print('Start downloading %s -> "%s" %d bytes' % (furl, fname, fsize)) with open(dlDir + fname, 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) print('Finished downloading %s -> "%s" %d bytes' % (furl, fname, fsize)) else: print('Already downloaded %s' % furl) md5 = getFileMd5(dlDir + fname) sha1 = getFileSha1(dlDir + fname) fsize = os.path.getsize(dlDir + fname) with open('tenda_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fver, fname, furl, fdate, fsize, sha1, md5]) except TimeoutError as ex: print('TomeoutError ex=%s, furl=%s' % (ex, furl)) except BaseException as ex: print('ex=%s, furl=%s' % (ex, furl)) traceback.print_exc()
def epilog(fsize, fdate): if not os.path.isfile(localstor + fname): sha1 = None md5 = None else: sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('us_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, rev, fw_ver, fw_url, fsize, fdate, sha1, md5]) return
def download(session, url, model, filename, fw_ver, fdate): from urllib import parse fname = filename try: doccont = session.get( url=url, headers={'Referer':'http://tsd.dlink.com.tw/downloads2008detailgo.asp', 'Upgrade-Insecure-Requests':'1'}, stream=True, timeout=30) fw_url = doccont.url # print('fw_url=', fw_url) docParams = parse.parse_qs(parse.urlsplit(doccont.url).query) # print('docParams=', docParams) if 'fileName' in docParams: fname = docParams['fileName'][0] else: fname = os.path.basename(parse.urlsplit(fw_url).path) if 'fileSize' in docParams: fsize = int(float(docParams['fileSize'][0])) # print('fsize=', fsize) if 'Content-Length' in doccont.headers: fsize = int(doccont.headers['Content-Length']) # print('fsize=Content-Length=', fsize) if 'Content-Disposition' in doccont.headers: fname = doccont.headers['Content-Disposition'].split(';', 1)[1].split('=', 1)[1] if 'fsize' in locals(): if os.path.isfile(localstor+fname) and os.path.getsize(localstor+fname)==fsize: # print('"%s" already exists'%(localstor+fname)) return print('Start Downloading "%s" to "%s"' % (doccont.url, localstor+fname)) with open(localstor + fname, 'wb') as fout: for chunk in doccont.iter_content(4096): fout.write(chunk) fsize = os.path.getsize(localstor + fname) print('Finisehd Downloading "%s" to "%s", fsize=%d' % (doccont.url, localstor+fname, fsize)) sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('tsd_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, '', fw_ver, fw_url, fdate, fsize, sha1, md5]) except socket.timeout: print('socket timeout error, url=', url) return except requests.exceptions.Timeout as ex: print('requests timeoute error, url=', url) return except BaseException as ex: print('unknown error, url=',url) traceback.print_exc() print(ex) return
def download_file(model, desc, fw_url): try: with closing(requests.get(url=fw_url, timeout=10, stream=True)) as resp: if 'Content-Length' in resp.headers: fileSize = int(resp.headers['Content-Length']) print('fileSize=', fileSize) else: fileSize=None try: fw_ver = re.search(r'\d+(\.\d+)+', desc).group(0) except AttributeError: fw_ver = '' fileName = os.path.basename(urlsplit(fw_url).path) print('fileName=', fileName) if not fileName: print('No fileName:, url=', fw_url) return if 'Last-Modified' in resp.headers: fw_date= resp.headers['Last-Modified'] fw_date = parse_date(fw_date) else: fw_date = None if os.path.isfile(dlDir+fileName) \ and fileSize==os.path.getsize(dlDir+fileName): print('already downloaded: ', fileName) else: print('start downloading: ', fw_url) with open(dlDir+fileName+'.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.rename(dlDir+fileName+'.downloading', dlDir+fileName) except FileNotFoundError: print('"%s" not found'%(dlDir+fileName+'.downloading')) print('finished downloading: ', fw_url) sha1 = getFileSha1(dlDir+fileName) md5 = getFileMd5(dlDir+fileName) if fileSize and os.path.getsize(dlDir+fileName)!=fileSize: print('Content-Length(%s) different to real fileSize %s' % (fileSize, os.path.getsize(dlDir+fileName))) fileSize = os.path.getsize(dlDir+fileName) with open('netgear_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5]) except BaseException as ex: traceback.print_exc() import pdb pdb.set_trace()
def main(): startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 global conn with sqlite3.connect('belkin.sqlite3') as conn: csr=conn.cursor() rows=csr.execute("SELECT id,download_url,page_url,file_sha1" " FROM TFiles LIMIT -1 OFFSET %d"%startIdx).fetchall() for idx, row in enumerate(rows,startIdx): devId,url,page_url,fileSha1=row if fileSha1 or not url: continue uprint('idx=%s, url=%s'%(idx,url)) url = safeUrl(url) uprint('url='+url) fname=urlFileName(url) uprint('download "%s"'%(fname)) fname = path.join(dlDir, fname) try: downloadFile(url, fname) except urllib.error.HTTPError: print(ex) continue except OSError as ex: if ex.errno == 28: print(ex);print('[Errno 28] No space left on device') break except Exception as ex: ipdb.set_trace() traceback.print_exc() continue fileSha1=getFileSha1(fname) fileSize=path.getsize(fname) print('sha1="%s" for "%s"'%(fileSha1,fname)) csr.execute("UPDATE TFiles SET file_sha1=:fileSha1," " file_size=:fileSize WHERE id=:devId", locals()) conn.commit() uprint('UPDATE fileSha1=%(fileSha1)s, fileSize=%(fileSize)s' ' WHERE id="%(devId)s"' %locals()) with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as ftp: ftp.upload(fname, path.basename(fname)) uprint('uploaded "%s" to ftp://%s/' %(path.basename(fname), ftpHostName)) os.remove(fname)
def upsert_psql(file_name, fw_url, model, version, rel_date): try: conn = psycopg2.connect(database="firmware", user="******", password="******", host="127.0.0.1") cur = conn.cursor() brand_id=1 file_sha1 = getFileSha1(file_name) file_md5 = getFileMd5(file_name) file_size = os.path.getsize(file_name) cur.execute("INSERT INTO image \ (filename, brand, model, version, rel_date, brand_id, \ file_size, hash, file_sha1, file_url) VALUES \ ( %s, %s, %s, %s, %s, %s, \ %s, %s, %s, %s)", (file_name, 'Avm', model, version, rel_date, brand_id, file_size, file_md5, file_sha1, fw_url)) conn.commit() finally: conn.close()
def main(): with open('dlink_ftp.dlink.eu_filelist.csv', 'w') as fout: cw = csv.writer(fout, dialect='excel') cw.writerow([ 'ftp_url', 'file_size', 'file_date', 'model', 'file_sha1', 'file_md5' ]) with open('dlink_ftp.dlink.eu_filelist.txt', 'r') as fin: for line in fin: line = line.strip() if not line: continue ftpurl, fsize, fdate = line.split('\t', 2) fdate = datetime.fromtimestamp(float(fdate)) fname = 'output/D-Link/ftp.dlink.eu/' + ftpurl.split('/')[-1] sha1 = getFileSha1(fname) md5 = getFileMd5(fname) fsize = path.getsize(fname) model = get_model_from_ftp_url(ftpurl) cw.writerow([ftpurl, fsize, fdate, model, sha1, md5]) print('%s,%s,%s,%s' % (ftpurl, fsize, fdate, model))
def download_file(hw_rev, model, fver, fdate, furl): #noqa try: with closing(requests.get(url=furl, timeout=10, stream=True)) as resp: if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) if 'Content-Disposition' in resp.headers: fname = resp.headers['Content-Disposition'] fname = fname.split(';')[-1].split('=')[-1].strip() if 'Content-Length' in resp.headers: fsize = resp.headers['Content-Length'] fsize = int(fsize) fname = os.path.basename(urlsplit(furl).path) if os.path.isfile(dlDir + fname) \ and fsize == os.path.getsize(dlDir + fname): print('already downloaded: ', fname) else: print('start downloading: ', furl) with open(dlDir + fname + '.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.rename(dlDir + fname + '.downloading', dlDir + fname) except FileNotFoundError: print('"%s" not found' % (dlDir + fname + '.downloading')) print('finished downloading: ', furl) sha1 = getFileSha1(dlDir + fname) md5 = getFileMd5(dlDir + fname) if fsize and os.path.getsize(dlDir + fname) != fsize: print('Content-Length(%s) different to real fsize %s' % (fsize, os.path.getsize(dlDir+fname))) fsize = os.path.getsize(dlDir + fname) with open('linksys_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, hw_rev, fver, furl, fdate, fsize, sha1, md5]) except BaseException as ex: traceback.print_exc() import pdb pdb.set_trace()
def main(): with open('au_dlink_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow([ 'model', 'rev', 'fw_ver', 'fw_url', 'file_size', 'file_sha1', 'file_md5', 'file_date' ]) with open('au_dlink_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) for model, rev, fw_ver, fw_url in cr: if fw_url.split('.')[-1].lower() in ['pdf']: continue fname = download_file(fw_url) file_sha1 = getFileSha1(fname) file_md5 = getFileMd5(fname) file_size = os.path.getsize(fname) file_date = get_ftp_date(fw_url) cw.writerow([ model, rev, fw_ver, fw_url, file_size, file_sha1, file_md5, file_date ])
def upsert_psql(file_name, fw_url, model, version, rel_date): try: conn = psycopg2.connect(database="firmware", user="******", password="******", host="127.0.0.1") cur = conn.cursor() brand_id = 1 file_sha1 = getFileSha1(file_name) file_md5 = getFileMd5(file_name) file_size = os.path.getsize(file_name) cur.execute( "INSERT INTO image \ (filename, brand, model, version, rel_date, brand_id, \ file_size, hash, file_sha1, file_url) VALUES \ ( %s, %s, %s, %s, %s, %s, \ %s, %s, %s, %s)", (file_name, 'Avm', model, version, rel_date, brand_id, file_size, file_md5, file_sha1, fw_url)) conn.commit() finally: conn.close()
def download_file(model, fileName, fw_url): try: resp = requests.get(url=fw_url, stream=True) if 'Content-Length' in resp.headers: fileSize = int(resp.headers['Content-Length']) print('fileSize=', fileSize) else: fileSize = None try: fw_ver = re.search(r'\d+(\.\d+)+', fileName).group(0) except AttributeError: fw_ver = '' fileName = os.path.basename(urllib.parse.urlsplit(fw_url).path) print('fileName=', fileName) if 'Last-Modified' in resp.headers: fw_date = resp.headers['Last-Modified'] fw_date = parse_date(fw_date) else: fw_date = None if os.path.isfile(dlDir+fileName) \ and fileSize==os.path.getsize(dlDir+fileName): print('already downloaded: ', fileName) else: print('start downloading: ', fw_url) with open(dlDir + fileName + '.downloading', 'wb') as fout: for chunk in resp.iter_content(8192): fout.write(chunk) os.rename(dlDir + fileName + '.downloading', dlDir + fileName) print('finished downloading: ', fw_url) sha1 = getFileSha1(dlDir + fileName) md5 = getFileMd5(dlDir + fileName) fileSize = os.path.getsize(dlDir + fileName) with open('netgear_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5 ]) except BaseException as ex: traceback.print_exc()
def main(): dlDir='./output/netgear/downloadcenter.netgear.com_form_submit/' with open('netgear_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) rows = [(model, fver, fname, furl,parse_date(fdate), int(fsize), sha1, md5) for model, fver, fname, furl, fdate, fsize, sha1, md5 in cr] with open('netgear_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow(['model', 'fw_ver', 'fileName', 'fw_url', 'fw_data', 'fileSize', 'sha1', 'md5']) for model, fver, fname, furl, fdate, fsize, sha1, md5 in rows: fsizeC = os.path.getsize(dlDir+fname) sha1C = getFileSha1(dlDir+fname) md5C = getFileMd5(dlDir+fname) if fsizeC != fsize: print('"%s" wrong fileSize(%s), correct= %s'%(fname, fsize, fsizeC)) elif sha1C != sha1: print('"%s" wrong sha1(%s), correct= %s'%(fname, sha1, sha1C)) elif md5C != md5: print('"%s" wrong md5(%s), correct= %s'%(fname, md5, md5C)) cw.writerow([model, fver, fname, furl, fdate, fsizeC, sha1C, md5C])
solid_index = next(i for i,_ in enumerate(rows) if _[2]==ftpurl and _[5] and i!=index) print('Merge "%s" "%s"'%(model, fw_ver)) solid_file_size, solid_date,file_sha1, file_md5 = rows[solid_index][3:7] assert file_sha1 if file_date is None: file_date = solid_date rows[solid_index] = model, fw_ver, ftpurl, solid_file_size, file_date, file_sha1, file_md5 del rows[index] except StopIteration: print('Download %s'%ftpurl) try: fname = download_file(ftpurl) except urllib.error.URLError: print('Failed to download ', ftpurl) continue file_sha1 = getFileSha1(fname) file_md5 = getFileMd5(fname) file_size = os.path.getsize(fname) rows[index][3]=file_size if rows[index][4] is None: get_ftp_date(ftpurl) rows[index][5]=file_sha1 rows[index][6]=file_md5 rows.sort(key=lambda r:(r[0].lower(),r[1].lower(),r[2].lower())) with open('dlink_ftp.dlink.eu_filelist3.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow(['model','fw_ver','fw_url','size','date','sha1','md5']) cw.writerows(rows) except Exception as ex:
def parse_download_page(page_url): global prevTrail d = pq(url=page_url) trailStr = '' try: d('h1.product-name')[0].text_content().strip() except IndexError: print('%s does NOT exist' % page_url) return model = d('h2.product-number')[0].text_content().strip() for idx, item in enumerate(d('li.download-item'), start=getStartIdx()): try: title = item.cssselect('h3.thin')[0].text_content() except IndexError: continue if 'firmware' not in title.lower(): continue rel_date = item.cssselect('small')[0].text_content() # 'Publication date \r: 18 January 2016' rel_date = rel_date.split('\r')[1].strip(': ') # '18 January 2016' rel_date = datetime.strptime(rel_date, '%d %B %Y') fw_ver = item.cssselect('.download-text-title')[0].text_content() # 'Version number\r: 2.11' fw_ver = fw_ver.split('\r')[-1].strip(': ') # '2.11' fw_ver = re.search(r'\d+(\.\d+)*', fw_ver).group(0) fw_desc = d('.download-item div')[0].text_content().strip() # 'Changed:\n\n\n\tAdd timeout to check DNS alive\n\tAdd procedure to # verify ipv4 and ipv6 on ppp session" fw_url = item.cssselect('a')[0].attrib['href'] try: uprint('start to download %s' % fw_url) local_file_path = cookie_friendly_download(page_url, fw_url, store_dir, timeout=1000) except urllib.error.HTTPError: print(ex) continue except OSError as ex: if ex.errno == 28: print(ex) print('[Errno 28] No space left on device') break except Exception as ex: ipdb.set_trace() traceback.print_exc() continue file_sha1 = getFileSha1(local_file_path) file_md5 = getFileMd5(local_file_path) file_size = path.getsize(local_file_path) uprint('file_path="%s", file_size=%s, file_sha1=%s' % (local_file_path, file_size, file_sha1)) trailStr = str(prevTrail + [idx]) psql( "INSERT INTO image" "(brand, model," " rel_date, version, description," " filename, file_sha1, hash, file_size," " page_url, file_url, tree_trail) VALUES" "( %s, %s, " " %s, %s, %s," " %s, %s, %s, %s," " %s, %s, %s)", ('Sitecom', model, rel_date, fw_ver, fw_desc, local_file_path, file_sha1, file_md5, file_size, page_url, fw_url, trailStr))
def main(): startIdx = int(sys.argv[1]) if len(sys.argv) > 1 else 0 global conn with sqlite3.connect("Linksys.sqlite3") as conn: csr = conn.cursor() rows = csr.execute( "SELECT brand,model,revision,file_title,href,file_sha1 FROM TFiles " "LIMIT -1 OFFSET %d" % startIdx ).fetchall() for idx, row in enumerate(rows, startIdx): brand, model, revision, fileTitle, url, fileSha1 = row print("idx= %d, fileSha1=%s" % (idx, fileSha1)) if fileSha1: uprint('"%s" already downloaded, bypass!' % fileTitle) continue if not url: continue fname = urlFileName(url) if not fname: fname = safeFileName(fileTitle) uprint("url=" + url) uprint('download "%s" as "%s"' % (fileTitle, fname)) fname = path.join(dlDir, fname) try: downloadFile(url, fname) except urllib.error.HTTPError: print(ex) continue except OSError as ex: if ex.errno == 28: print(ex) print("[Errno 28] No space left on device") break except Exception as ex: import pdb pdb.set_trace() import traceback traceback.print_exc() print(ex) continue fileSha1 = getFileSha1(fname) fileSize = path.getsize(fname) print('sha1="%s" for "%s"' % (fileSha1, fname)) csr.execute( "UPDATE TFiles SET file_sha1=:fileSha1, file_size=:fileSize" " WHERE brand=:brand AND model=:model AND revision=:revision" " AND file_title=:fileTitle", locals(), ) conn.commit() print( "UPDATE fileSha1=%(fileSha1)s, fileSize=%(fileSize)d" ' WHERE "%(brand)s", "%(model)s", "%(revision)s", ' '"%(fileTitle)s" ' % locals() ) with ftputil.FTPHost(ftpHostName, ftpUserName, ftpPassword) as ftp: ftp.upload(fname, path.basename(fname)) print('uploaded "%s" to ftp://%s/' % (path.basename(fname), ftpHostName)) os.remove(fname)