def download_unzip(url, extract_path): if os.path.isdir(extract_path): return True data_root = os.path.join(top_path, 'data') download_path = os.path.join(data_root, 'downloads') if not os.path.isdir(download_path): os.mkdir(download_path) fn = url.split("/")[-1] dfn = os.path.join(download_path, fn) if not download_file(url, dfn): xlog.warn("download file %s fail.", url) return try: os.mkdir(extract_path) with zipfile.ZipFile(dfn, "r") as dz: dz.extractall(extract_path) dz.close() xlog.info("Extract %s to %s success.", fn, extract_path) except Exception as e: xlog.warn("unzip %s fail:%r", dfn, e) shutil.rmtree(extract_path) raise e os.remove(dfn)
def download_file(url, filename, sha256=None): org_url = url if os.path.isfile(filename): return True for i in range(0, 4): try: xlog.info("download %s to %s, retry:%d", url, filename, i) req = request(url, i, timeout=120) if not req: time.sleep(60) continue if req.status == 302: url = req.headers[b"Location"] continue start_time = time.time() timeout = 300 if req.chunked: downloaded = 0 with open(filename, 'wb') as fp: while True: time_left = timeout - (time.time() - start_time) if time_left < 0: raise Exception("time out") dat = req.read(timeout=time_left) if not dat: break fp.write(dat) downloaded += len(dat) return True else: file_size = int(req.getheader(b'Content-Length', 0)) left = file_size downloaded = 0 with open(filename, 'wb') as fp: while True: chunk_len = min(65536, left) if not chunk_len: break chunk = req.read(chunk_len) if not chunk: break fp.write(chunk) downloaded += len(chunk) left -= len(chunk) if downloaded != file_size: xlog.warn("download size:%d, need size:%d, download fail.", downloaded, file_size) os.remove(filename) continue else: if sha256 and sha256 != hash_file_sum(filename): xlog.warn("donwload %s checksum fail.", filename) return False else: xlog.info("download %s to %s success.", org_url, filename) return True except Exception as e: xlog.warn("download %s to %s fail:%r", org_url, filename, e) continue xlog.warn("download %s fail", org_url)