Esempio n. 1
0
def ftp_download_files(url_base, folder_path, cacheDir, filenames, compression='', overwrite=False):
  """
  Like download file, but more of them. Keeps a persistent FTP connection open
  to be more efficient.
  """
  folder_path_abs = os.path.join(cacheDir, folder_path)

  ftp = ftp_connect(url_base + folder_path)

  filepaths = []
  for filename in filenames:
    filename_zipped = filename + compression
    filepath = str(hatanaka.get_decompressed_path(os.path.join(folder_path_abs, filename)))
    filepath_zipped = os.path.join(folder_path_abs, filename_zipped)
    print("pulling from", url_base, "to", filepath)

    if not os.path.isfile(filepath) or overwrite:
      if not os.path.exists(folder_path_abs):
        os.makedirs(folder_path_abs)
      try:
        ftp.retrbinary('RETR ' + filename_zipped, open(filepath_zipped, 'wb').write)
      except (ftplib.error_perm):
        raise IOError("Could not download file from: " + url_base + folder_path + filename_zipped)
      except (socket.timeout):
        raise IOError("Read timed out from: " + url_base + folder_path + filename_zipped)
      filepaths.append(str(hatanaka.decompress_on_disk(filepath_zipped)))
    else:
      filepaths.append(filepath)
  return filepaths
Esempio n. 2
0
def download_and_cache_file(url_base, folder_path, cacheDir, filename, compression='', overwrite=False):
  folder_path_abs = os.path.join(cacheDir, folder_path)
  filename_zipped = filename + compression

  filepath = str(hatanaka.get_decompressed_path(os.path.join(folder_path_abs, filename)))
  filepath_attempt = filepath + '.attempt_time'
  filepath_zipped = os.path.join(folder_path_abs, filename_zipped)

  if os.path.exists(filepath_attempt):
    with open(filepath_attempt, 'rb') as rf:
      last_attempt_time = float(rf.read().decode())
    if time.time() - last_attempt_time < SECS_IN_HR:
      raise IOError(f"Too soon to try  {folder_path + filename_zipped} from {url_base} ")

  if not os.path.isfile(filepath) or overwrite:
    if not os.path.exists(folder_path_abs):
      os.makedirs(folder_path_abs)

    try:
      data_zipped = download_file(url_base, folder_path, filename_zipped)
    except (IOError, pycurl.error, socket.timeout):
      unix_time = time.time()
      if not os.path.exists(cacheDir + 'tmp/'):
        os.makedirs(cacheDir + '/tmp')
      with tempfile.NamedTemporaryFile(delete=False, dir=cacheDir+'tmp/') as fout:
        fout.write(str.encode(str(unix_time)))
      os.replace(fout.name, filepath + '.attempt_time')
      raise IOError(f"Could not download {folder_path + filename_zipped} from {url_base} ")


    with open(filepath_zipped, 'wb') as wf:
      wf.write(data_zipped)

    filepath = str(hatanaka.decompress_on_disk(filepath_zipped))
  return filepath
Esempio n. 3
0
def test_on_disk_empty_input(tmp_path, crx_str, rnx_bytes):
    path = tmp_path / 'sample.crx'
    path.write_bytes(b'')
    with pytest.raises(ValueError) as excinfo:
        decompress_on_disk(path)
    assert "empty file" in str(excinfo.value)
    assert not get_decompressed_path(path).exists()
    path.unlink()

    path = tmp_path / 'sample.rnx'
    path.write_bytes(b'')
    with pytest.raises(ValueError) as excinfo:
        compress_on_disk(path)
    assert "file is too short" in str(excinfo.value)
    assert not get_compressed_path(path, is_obs=True).exists()