def download_ftp_file(model, fver, fdate, furl): try: host = ftputil.FTPHost( urlsplit(furl).hostname, "anonymous", "*****@*****.**") except ftputil.error.FTPOSError as ex: print('ex=%s, furl=%s' % (ex, furl)) return try: host.keep_alive() if not fdate: fdate = host.path.getmtime(urlsplit(furl).path) fsize = host.path.getsize(urlsplit(furl).path) needDownload, fname = determine_ftp_filename(host, furl) if needDownload: fsize = host.path.getsize(urlsplit(furl).path) print("Start download %s -> \"%s\" %d bytes" % (furl, fname, fsize)) host.download(urlsplit(furl).path, dlDir + fname) print("Finished download \"%s\" -> %s %d bytes" % (furl, fname, fsize)) else: print('Already downloaded %s' % (furl)) md5 = getFileMd5(dlDir + fname) sha1 = getFileSha1(dlDir + fname) fsize = os.path.getsize(dlDir + fname) with open('zyxel_us_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, fver, fname, furl, fdate, fsize, sha1, md5]) except TimeoutError as ex: print('Tomeout Error ex=%s, furl=%s' % (ex, furl)) except BaseException as ex: print('ex=%s, furl=%s' % (ex, furl)) traceback.print_exc() finally: host.close()
def main(): with open('ca_dlink_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) rows = [[model, rev, ver, url, date] for model, rev, ver, url, date in cr] for index, row in enumerate(rows): model, rev, fw_ver, ftp_url, date = row try: fname = download_file(ftp_url) sha1 = getFileSha1(fname) md5 = getFileMd5(fname) fsize = os.path.getsize(fname) rows[index] = [model, rev, fw_ver, ftp_url, date, fsize, sha1, md5] except urllib.error.URLError: print('Failed:', ftp_url) rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', ''] except ValueError: print('Failed wrong url "%s"' % ftp_url) rows[index] = [model, rev, fw_ver, ftp_url, date, -1, '', ''] with open('ca_dlink_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow([ 'model', 'rev', 'fw_ver', 'ftp_url', 'date', 'size', 'sha1', 'md5' ]) cw.writerows(rows)
def epilog(): sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('jp_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fw_ver, fw_url, rel_date, fsize, sha1, md5])
def download(ftpurl): try: fname = ftpurl.split('/')[-1] if fname.lower() in ['thumbs.db']: return if fname.split('.')[-1].lower() in ['pdf', 'txt']: return with ftputil.FTPHost('files.dlink.com.au', 'anonymous', '') as host: fsize = host.path.getsize(ftpurl) fdate = host.path.getmtime(ftpurl) if os.path.isfile(localstor + fname) and os.path.getsize(localstor + fname) == fsize: print('%(fname)s already exists' % locals()) return print('Start downloading %(ftpurl)s' % locals()) host.download(ftpurl, localstor + fname) print('Finised downloading %(ftpurl)s' % locals()) file_sha1 = getFileSha1(localstor + fname) file_md5 = getFileMd5(localstor + fname) with open('au_dlink_ftp_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ftpurl, fsize, fdate, file_sha1, file_md5]) except Exception as ex: print(ex)
def download_file(model, fdesc, furl): # noqa try: with closing(requests.get(url=furl, timeout=30, stream=True)) as resp: if 'Content-Length' in resp.headers: fsize = int(resp.headers['Content-Length']) else: fsize = None if not fsize: print('Unknown size resp.url=%s, headers=%s' % (resp.url, resp.headers)) with open('netgear_cn_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ model, "", "", furl, None, fsize, "unknown", "unknown" ]) return if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) else: fdate = None try: fver = re.search(r'\d+(\.\d+)+', fdesc).group(0) except AttributeError: fver = '' needDownload, fname = determine_filename(resp) if not needDownload: print('Already downloaded: ', fname) else: print('Start downloading (%d bytes): %s' % (fsize, furl)) with open(dlDir + fname, 'wb') as fout: fout.write(b'place_holder0') with open(dlDir + fname + '.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.replace(dlDir + fname + '.downloading', dlDir + fname) except BaseException as ex: print(ex) print('"%s" not found' % (dlDir + fname + '.downloading')) print('Finished downloading: %s' % furl) sha1 = getFileSha1(dlDir + fname) md5 = getFileMd5(dlDir + fname) if fsize and os.path.getsize(dlDir + fname) != fsize: print('Content-Length(%s) different to real fsize %s' % (fsize, os.path.getsize(dlDir + fname))) fsize = os.path.getsize(dlDir + fname) with open('netgear_cn_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fver, fname, furl, fdate, fsize, sha1, md5]) except requests.exceptions.ConnectionError: print('ConnectionError: %s' % furl) except requests.exceptions.ReadTimeout: print('ReadTimeout: %s' % furl) except BaseException as ex: traceback.print_exc()
def epilog(fsize, fdate): if not os.path.isfile(localstor + fname): sha1 = None md5 = None else: sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('us_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, rev, fw_ver, fw_url, fsize, fdate, sha1, md5]) return
def download_file(model, fver, text, furl): #noqa try: with closing(requests.get(url=furl, timeout=30, stream=True)) as resp: if 'Content-Length' in resp.headers: fsize = int(resp.headers['Content-Length']) else: fsize = None if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) else: fdate = None fname = os.path.basename(urlsplit(furl).path) alreadyDownloaded = False if os.path.exists(dlDir + fname) and os.path.getsize(dlDir + fname) == fsize: alreadyDownloaded = True elif os.path.exists( dlDir + fname) and os.path.getsize(dlDir + fname) != fsize: # rename until not os.path.exist(fname) while os.path.exists(dlDir + fname): ftitle, fext = os.path.splitext(fname) m = re.search('(.+)_(\d+)', ftitle) if m: ftitle = m.group(1) + '_' + str(int(m.group(2)) + 1) fname = ftitle + fext else: fname = ftitle + "_1" + fext if not alreadyDownloaded: print('Start downloading %s -> "%s" %d bytes' % (furl, fname, fsize)) with open(dlDir + fname, 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) print('Finished downloading %s -> "%s" %d bytes' % (furl, fname, fsize)) else: print('Already downloaded %s' % furl) md5 = getFileMd5(dlDir + fname) sha1 = getFileSha1(dlDir + fname) fsize = os.path.getsize(dlDir + fname) with open('tenda_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow( [model, fver, fname, furl, fdate, fsize, sha1, md5]) except TimeoutError as ex: print('TomeoutError ex=%s, furl=%s' % (ex, furl)) except BaseException as ex: print('ex=%s, furl=%s' % (ex, furl)) traceback.print_exc()
def download(session, url, model, filename, fw_ver, fdate): from urllib import parse fname = filename try: doccont = session.get( url=url, headers={'Referer':'http://tsd.dlink.com.tw/downloads2008detailgo.asp', 'Upgrade-Insecure-Requests':'1'}, stream=True, timeout=30) fw_url = doccont.url # print('fw_url=', fw_url) docParams = parse.parse_qs(parse.urlsplit(doccont.url).query) # print('docParams=', docParams) if 'fileName' in docParams: fname = docParams['fileName'][0] else: fname = os.path.basename(parse.urlsplit(fw_url).path) if 'fileSize' in docParams: fsize = int(float(docParams['fileSize'][0])) # print('fsize=', fsize) if 'Content-Length' in doccont.headers: fsize = int(doccont.headers['Content-Length']) # print('fsize=Content-Length=', fsize) if 'Content-Disposition' in doccont.headers: fname = doccont.headers['Content-Disposition'].split(';', 1)[1].split('=', 1)[1] if 'fsize' in locals(): if os.path.isfile(localstor+fname) and os.path.getsize(localstor+fname)==fsize: # print('"%s" already exists'%(localstor+fname)) return print('Start Downloading "%s" to "%s"' % (doccont.url, localstor+fname)) with open(localstor + fname, 'wb') as fout: for chunk in doccont.iter_content(4096): fout.write(chunk) fsize = os.path.getsize(localstor + fname) print('Finisehd Downloading "%s" to "%s", fsize=%d' % (doccont.url, localstor+fname, fsize)) sha1 = getFileSha1(localstor + fname) md5 = getFileMd5(localstor + fname) with open('tsd_dlink_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, '', fw_ver, fw_url, fdate, fsize, sha1, md5]) except socket.timeout: print('socket timeout error, url=', url) return except requests.exceptions.Timeout as ex: print('requests timeoute error, url=', url) return except BaseException as ex: print('unknown error, url=',url) traceback.print_exc() print(ex) return
def download_file(model, desc, fw_url): try: with closing(requests.get(url=fw_url, timeout=10, stream=True)) as resp: if 'Content-Length' in resp.headers: fileSize = int(resp.headers['Content-Length']) print('fileSize=', fileSize) else: fileSize=None try: fw_ver = re.search(r'\d+(\.\d+)+', desc).group(0) except AttributeError: fw_ver = '' fileName = os.path.basename(urlsplit(fw_url).path) print('fileName=', fileName) if not fileName: print('No fileName:, url=', fw_url) return if 'Last-Modified' in resp.headers: fw_date= resp.headers['Last-Modified'] fw_date = parse_date(fw_date) else: fw_date = None if os.path.isfile(dlDir+fileName) \ and fileSize==os.path.getsize(dlDir+fileName): print('already downloaded: ', fileName) else: print('start downloading: ', fw_url) with open(dlDir+fileName+'.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.rename(dlDir+fileName+'.downloading', dlDir+fileName) except FileNotFoundError: print('"%s" not found'%(dlDir+fileName+'.downloading')) print('finished downloading: ', fw_url) sha1 = getFileSha1(dlDir+fileName) md5 = getFileMd5(dlDir+fileName) if fileSize and os.path.getsize(dlDir+fileName)!=fileSize: print('Content-Length(%s) different to real fileSize %s' % (fileSize, os.path.getsize(dlDir+fileName))) fileSize = os.path.getsize(dlDir+fileName) with open('netgear_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5]) except BaseException as ex: traceback.print_exc() import pdb pdb.set_trace()
def upsert_psql(file_name, fw_url, model, version, rel_date): try: conn = psycopg2.connect(database="firmware", user="******", password="******", host="127.0.0.1") cur = conn.cursor() brand_id=1 file_sha1 = getFileSha1(file_name) file_md5 = getFileMd5(file_name) file_size = os.path.getsize(file_name) cur.execute("INSERT INTO image \ (filename, brand, model, version, rel_date, brand_id, \ file_size, hash, file_sha1, file_url) VALUES \ ( %s, %s, %s, %s, %s, %s, \ %s, %s, %s, %s)", (file_name, 'Avm', model, version, rel_date, brand_id, file_size, file_md5, file_sha1, fw_url)) conn.commit() finally: conn.close()
def main(): with open('dlink_ftp.dlink.eu_filelist.csv', 'w') as fout: cw = csv.writer(fout, dialect='excel') cw.writerow([ 'ftp_url', 'file_size', 'file_date', 'model', 'file_sha1', 'file_md5' ]) with open('dlink_ftp.dlink.eu_filelist.txt', 'r') as fin: for line in fin: line = line.strip() if not line: continue ftpurl, fsize, fdate = line.split('\t', 2) fdate = datetime.fromtimestamp(float(fdate)) fname = 'output/D-Link/ftp.dlink.eu/' + ftpurl.split('/')[-1] sha1 = getFileSha1(fname) md5 = getFileMd5(fname) fsize = path.getsize(fname) model = get_model_from_ftp_url(ftpurl) cw.writerow([ftpurl, fsize, fdate, model, sha1, md5]) print('%s,%s,%s,%s' % (ftpurl, fsize, fdate, model))
def download_file(hw_rev, model, fver, fdate, furl): #noqa try: with closing(requests.get(url=furl, timeout=10, stream=True)) as resp: if 'Last-Modified' in resp.headers: fdate = resp.headers['Last-Modified'] fdate = parse_date(fdate) if 'Content-Disposition' in resp.headers: fname = resp.headers['Content-Disposition'] fname = fname.split(';')[-1].split('=')[-1].strip() if 'Content-Length' in resp.headers: fsize = resp.headers['Content-Length'] fsize = int(fsize) fname = os.path.basename(urlsplit(furl).path) if os.path.isfile(dlDir + fname) \ and fsize == os.path.getsize(dlDir + fname): print('already downloaded: ', fname) else: print('start downloading: ', furl) with open(dlDir + fname + '.downloading', 'wb') as fout: for chunk in resp.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks fout.write(chunk) try: os.rename(dlDir + fname + '.downloading', dlDir + fname) except FileNotFoundError: print('"%s" not found' % (dlDir + fname + '.downloading')) print('finished downloading: ', furl) sha1 = getFileSha1(dlDir + fname) md5 = getFileMd5(dlDir + fname) if fsize and os.path.getsize(dlDir + fname) != fsize: print('Content-Length(%s) different to real fsize %s' % (fsize, os.path.getsize(dlDir+fname))) fsize = os.path.getsize(dlDir + fname) with open('linksys_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([model, hw_rev, fver, furl, fdate, fsize, sha1, md5]) except BaseException as ex: traceback.print_exc() import pdb pdb.set_trace()
def main(): with open('au_dlink_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow([ 'model', 'rev', 'fw_ver', 'fw_url', 'file_size', 'file_sha1', 'file_md5', 'file_date' ]) with open('au_dlink_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) for model, rev, fw_ver, fw_url in cr: if fw_url.split('.')[-1].lower() in ['pdf']: continue fname = download_file(fw_url) file_sha1 = getFileSha1(fname) file_md5 = getFileMd5(fname) file_size = os.path.getsize(fname) file_date = get_ftp_date(fw_url) cw.writerow([ model, rev, fw_ver, fw_url, file_size, file_sha1, file_md5, file_date ])
def download_file(model, fileName, fw_url): try: resp = requests.get(url=fw_url, stream=True) if 'Content-Length' in resp.headers: fileSize = int(resp.headers['Content-Length']) print('fileSize=', fileSize) else: fileSize = None try: fw_ver = re.search(r'\d+(\.\d+)+', fileName).group(0) except AttributeError: fw_ver = '' fileName = os.path.basename(urllib.parse.urlsplit(fw_url).path) print('fileName=', fileName) if 'Last-Modified' in resp.headers: fw_date = resp.headers['Last-Modified'] fw_date = parse_date(fw_date) else: fw_date = None if os.path.isfile(dlDir+fileName) \ and fileSize==os.path.getsize(dlDir+fileName): print('already downloaded: ', fileName) else: print('start downloading: ', fw_url) with open(dlDir + fileName + '.downloading', 'wb') as fout: for chunk in resp.iter_content(8192): fout.write(chunk) os.rename(dlDir + fileName + '.downloading', dlDir + fileName) print('finished downloading: ', fw_url) sha1 = getFileSha1(dlDir + fileName) md5 = getFileMd5(dlDir + fileName) fileSize = os.path.getsize(dlDir + fileName) with open('netgear_filelist.csv', 'a') as fout: cw = csv.writer(fout) cw.writerow([ model, fw_ver, fileName, fw_url, fw_date, fileSize, sha1, md5 ]) except BaseException as ex: traceback.print_exc()
def main(): dlDir='./output/netgear/downloadcenter.netgear.com_form_submit/' with open('netgear_filelist.csv', 'r') as fin: cr = csv.reader(fin) next(cr) rows = [(model, fver, fname, furl,parse_date(fdate), int(fsize), sha1, md5) for model, fver, fname, furl, fdate, fsize, sha1, md5 in cr] with open('netgear_filelist2.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow(['model', 'fw_ver', 'fileName', 'fw_url', 'fw_data', 'fileSize', 'sha1', 'md5']) for model, fver, fname, furl, fdate, fsize, sha1, md5 in rows: fsizeC = os.path.getsize(dlDir+fname) sha1C = getFileSha1(dlDir+fname) md5C = getFileMd5(dlDir+fname) if fsizeC != fsize: print('"%s" wrong fileSize(%s), correct= %s'%(fname, fsize, fsizeC)) elif sha1C != sha1: print('"%s" wrong sha1(%s), correct= %s'%(fname, sha1, sha1C)) elif md5C != md5: print('"%s" wrong md5(%s), correct= %s'%(fname, md5, md5C)) cw.writerow([model, fver, fname, furl, fdate, fsizeC, sha1C, md5C])
def upsert_psql(file_name, fw_url, model, version, rel_date): try: conn = psycopg2.connect(database="firmware", user="******", password="******", host="127.0.0.1") cur = conn.cursor() brand_id = 1 file_sha1 = getFileSha1(file_name) file_md5 = getFileMd5(file_name) file_size = os.path.getsize(file_name) cur.execute( "INSERT INTO image \ (filename, brand, model, version, rel_date, brand_id, \ file_size, hash, file_sha1, file_url) VALUES \ ( %s, %s, %s, %s, %s, %s, \ %s, %s, %s, %s)", (file_name, 'Avm', model, version, rel_date, brand_id, file_size, file_md5, file_sha1, fw_url)) conn.commit() finally: conn.close()
def parse_download_page(page_url): global prevTrail d = pq(url=page_url) trailStr = '' try: d('h1.product-name')[0].text_content().strip() except IndexError: print('%s does NOT exist' % page_url) return model = d('h2.product-number')[0].text_content().strip() for idx, item in enumerate(d('li.download-item'), start=getStartIdx()): try: title = item.cssselect('h3.thin')[0].text_content() except IndexError: continue if 'firmware' not in title.lower(): continue rel_date = item.cssselect('small')[0].text_content() # 'Publication date \r: 18 January 2016' rel_date = rel_date.split('\r')[1].strip(': ') # '18 January 2016' rel_date = datetime.strptime(rel_date, '%d %B %Y') fw_ver = item.cssselect('.download-text-title')[0].text_content() # 'Version number\r: 2.11' fw_ver = fw_ver.split('\r')[-1].strip(': ') # '2.11' fw_ver = re.search(r'\d+(\.\d+)*', fw_ver).group(0) fw_desc = d('.download-item div')[0].text_content().strip() # 'Changed:\n\n\n\tAdd timeout to check DNS alive\n\tAdd procedure to # verify ipv4 and ipv6 on ppp session" fw_url = item.cssselect('a')[0].attrib['href'] try: uprint('start to download %s' % fw_url) local_file_path = cookie_friendly_download(page_url, fw_url, store_dir, timeout=1000) except urllib.error.HTTPError: print(ex) continue except OSError as ex: if ex.errno == 28: print(ex) print('[Errno 28] No space left on device') break except Exception as ex: ipdb.set_trace() traceback.print_exc() continue file_sha1 = getFileSha1(local_file_path) file_md5 = getFileMd5(local_file_path) file_size = path.getsize(local_file_path) uprint('file_path="%s", file_size=%s, file_sha1=%s' % (local_file_path, file_size, file_sha1)) trailStr = str(prevTrail + [idx]) psql( "INSERT INTO image" "(brand, model," " rel_date, version, description," " filename, file_sha1, hash, file_size," " page_url, file_url, tree_trail) VALUES" "( %s, %s, " " %s, %s, %s," " %s, %s, %s, %s," " %s, %s, %s)", ('Sitecom', model, rel_date, fw_ver, fw_desc, local_file_path, file_sha1, file_md5, file_size, page_url, fw_url, trailStr))
import psycopg2 import sqlite3 from web_utils import getFileMd5 db1 = sqlite3.connect('sitecom.sqlite3') cur = db1.cursor() cur.execute("SELECT brand, model, fw_date, fw_ver, fw_desc,\ file_name, file_sha1, file_size, \ page_url, file_url FROM TFiles") rows = cur.fetchall() db1.close() db2 = psycopg2.connect(database='firmware', user='******', password='******', host='127.0.0.1') cur = db2.cursor() for row in rows: brand, model, rel_date, fw_ver, fw_desc, \ file_name, file_sha1, file_size, \ page_url, fw_url = row fw_md5 = getFileMd5(file_name) cur.execute( "INSERT INTO image (filename,description,hash,\ brand,model,version,file_url,rel_date,page_url,\ file_sha1, file_size)VALUES(%s, %s, %s,\ %s, %s, %s, %s, %s, %s,\ %s, %s)", (file_name, fw_desc, fw_md5, brand, model, fw_ver, fw_url, rel_date, page_url, file_sha1, file_size)) db2.commit()
print('Merge "%s" "%s"'%(model, fw_ver)) solid_file_size, solid_date,file_sha1, file_md5 = rows[solid_index][3:7] assert file_sha1 if file_date is None: file_date = solid_date rows[solid_index] = model, fw_ver, ftpurl, solid_file_size, file_date, file_sha1, file_md5 del rows[index] except StopIteration: print('Download %s'%ftpurl) try: fname = download_file(ftpurl) except urllib.error.URLError: print('Failed to download ', ftpurl) continue file_sha1 = getFileSha1(fname) file_md5 = getFileMd5(fname) file_size = os.path.getsize(fname) rows[index][3]=file_size if rows[index][4] is None: get_ftp_date(ftpurl) rows[index][5]=file_sha1 rows[index][6]=file_md5 rows.sort(key=lambda r:(r[0].lower(),r[1].lower(),r[2].lower())) with open('dlink_ftp.dlink.eu_filelist3.csv', 'w') as fout: cw = csv.writer(fout) cw.writerow(['model','fw_ver','fw_url','size','date','sha1','md5']) cw.writerows(rows) except Exception as ex: traceback.print_exc()