def ftp_download_files(url_base, folder_path, cacheDir, filenames, compression='', overwrite=False): """ Like download file, but more of them. Keeps a persistent FTP connection open to be more efficient. """ folder_path_abs = os.path.join(cacheDir, folder_path) ftp = ftp_connect(url_base + folder_path) filepaths = [] for filename in filenames: filename_zipped = filename + compression filepath = str(hatanaka.get_decompressed_path(os.path.join(folder_path_abs, filename))) filepath_zipped = os.path.join(folder_path_abs, filename_zipped) print("pulling from", url_base, "to", filepath) if not os.path.isfile(filepath) or overwrite: if not os.path.exists(folder_path_abs): os.makedirs(folder_path_abs) try: ftp.retrbinary('RETR ' + filename_zipped, open(filepath_zipped, 'wb').write) except (ftplib.error_perm): raise IOError("Could not download file from: " + url_base + folder_path + filename_zipped) except (socket.timeout): raise IOError("Read timed out from: " + url_base + folder_path + filename_zipped) filepaths.append(str(hatanaka.decompress_on_disk(filepath_zipped))) else: filepaths.append(filepath) return filepaths
def download_and_cache_file(url_base, folder_path, cacheDir, filename, compression='', overwrite=False): folder_path_abs = os.path.join(cacheDir, folder_path) filename_zipped = filename + compression filepath = str(hatanaka.get_decompressed_path(os.path.join(folder_path_abs, filename))) filepath_attempt = filepath + '.attempt_time' filepath_zipped = os.path.join(folder_path_abs, filename_zipped) if os.path.exists(filepath_attempt): with open(filepath_attempt, 'rb') as rf: last_attempt_time = float(rf.read().decode()) if time.time() - last_attempt_time < SECS_IN_HR: raise IOError(f"Too soon to try {folder_path + filename_zipped} from {url_base} ") if not os.path.isfile(filepath) or overwrite: if not os.path.exists(folder_path_abs): os.makedirs(folder_path_abs) try: data_zipped = download_file(url_base, folder_path, filename_zipped) except (IOError, pycurl.error, socket.timeout): unix_time = time.time() if not os.path.exists(cacheDir + 'tmp/'): os.makedirs(cacheDir + '/tmp') with tempfile.NamedTemporaryFile(delete=False, dir=cacheDir+'tmp/') as fout: fout.write(str.encode(str(unix_time))) os.replace(fout.name, filepath + '.attempt_time') raise IOError(f"Could not download {folder_path + filename_zipped} from {url_base} ") with open(filepath_zipped, 'wb') as wf: wf.write(data_zipped) filepath = str(hatanaka.decompress_on_disk(filepath_zipped)) return filepath
def test_on_disk_empty_input(tmp_path, crx_str, rnx_bytes): path = tmp_path / 'sample.crx' path.write_bytes(b'') with pytest.raises(ValueError) as excinfo: decompress_on_disk(path) assert "empty file" in str(excinfo.value) assert not get_decompressed_path(path).exists() path.unlink() path = tmp_path / 'sample.rnx' path.write_bytes(b'') with pytest.raises(ValueError) as excinfo: compress_on_disk(path) assert "file is too short" in str(excinfo.value) assert not get_compressed_path(path, is_obs=True).exists()