def test_get_and_create_sample_dir(): # test default config path = get_and_create_sample_dir() assert path == os.path.join(USER, 'sunpy', 'data', 'sample_data') # test updated config new_path = os.path.join(USER, 'data_here_please') config.set('downloads', 'sample_dir', new_path) path = get_and_create_sample_dir() assert path == new_path
def test_get_and_create_sample_dir(): # test default config path = get_and_create_sample_dir() assert Path(path) == Path(dirs.user_data_dir) # test updated config new_path = os.path.join(USER, 'sample_data_here_please') config.set('downloads', 'sample_dir', new_path) path = get_and_create_sample_dir() assert path == new_path # Set the config back os.rmdir(new_path) config.set('downloads', 'sample_dir', os.path.join(USER, 'sunpy', 'data', 'sample_data'))
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) already_downloaded = [] to_download = [] for url_file_name in _SAMPLE_FILES.keys(): fname = sampledata_dir/url_file_name # We want to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: # URL and Filename pairs to_download.append((url_file_name, fname)) if to_download: results = _download_sample_data(_BASE_URLS[0], to_download, overwrite=overwrite) else: return already_downloaded # Something went wrong. if results.errors: results = _retry_sample_data(results) return results + already_downloaded
def test_print_config_files(tmpdir, tmp_path, undo_download_dir_patch): with io.StringIO() as buf, redirect_stdout(buf): print_config() printed = buf.getvalue() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = parfive.Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir / file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=sampledata_dir / file_name) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = Path(err.url).name # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = Path(err.url).name warnings.warn(f"File {file_name} not found.", SunpyUserWarning) return results
def download_sample_data(progress=True, overwrite=True, timeout=None): """ Download the sample data. Parameters ---------- progress: `bool` Show a progress bar during download overwrite: `bool` If exist overwrites the downloaded sample data. timeout: `float` The timeout in seconds. If `None` the default timeout is used from `astropy.utils.data.Conf.remote_timeout`. Returns ------- None """ # Creating the directory for sample files to be downloaded sampledata_dir = get_and_create_sample_dir() number_of_files_fetched = 0 print("Downloading sample files to {}".format(sampledata_dir)) for file_name in six.itervalues(_files): if not overwrite: if os.path.isfile(os.path.join(sampledata_dir, file_name[0])): number_of_files_fetched += 1 continue for base_url in _base_urls: full_file_name = file_name[0] + file_name[1] try: exists = url_exists(os.path.join(base_url, full_file_name)) if exists: f = download_file(os.path.join(base_url, full_file_name)) real_name, ext = os.path.splitext(full_file_name) if file_name[1] == '.zip': print("Unpacking: {}".format(real_name)) with ZipFile(f, 'r') as zip_file: zip_file.extract(real_name, sampledata_dir) os.remove(f) else: # move files to the data directory move(f, os.path.join(sampledata_dir, file_name[0])) # increment the number of files obtained to check later number_of_files_fetched += 1 break except (socket.error, socket.timeout) as e: warnings.warn("Download failed with error {}. \n" "Retrying with different mirror.".format(e)) if number_of_files_fetched < len(list(_files.keys())): raise URLError("Could not download all samples files." "Problem with accessing sample data servers.")
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = parfive.Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir/file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=sampledata_dir/file_name) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = Path(err.url).name # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = Path(err.url).name warnings.warn(f"File {file_name} not found.", SunpyUserWarning) return results
def test_print_config_files(undo_download_dir_patch): # TODO: Tidy this up. stdout = sys.stdout out = io.StringIO() sys.stdout = out print_config() sys.stdout = stdout out.seek(0) printed = out.read() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def test_print_config_files(): # TODO: Tidy this up. stdout = sys.stdout out = io.StringIO() sys.stdout = out print_config() sys.stdout = stdout out.seek(0) printed = out.read() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def get_sample_file(filename, url_list, show_progress=True, overwrite=False, timeout=None): """ Downloads a sample file. Will download a sample data file and move it to the sample data directory. Also, uncompresses zip files if necessary. Returns the local file if exists. Parameters ---------- filename: `str` Name of the file url_list: `str` or `list` urls where to look for the file show_progress: `bool` Show a progress bar during download overwrite: `bool` If True download and overwrite an existing file. timeout: `float` The timeout in seconds. If `None` the default timeout is used from `astropy.utils.data.Conf.remote_timeout`. Returns ------- result: `str` The local path of the file. None if it failed. """ # Creating the directory for sample files to be downloaded sampledata_dir = get_and_create_sample_dir() if filename[-3:] == 'zip': uncompressed_filename = filename[:-4] else: uncompressed_filename = filename # check if the (uncompressed) file exists if not overwrite and os.path.isfile( os.path.join(sampledata_dir, uncompressed_filename)): return os.path.join(sampledata_dir, uncompressed_filename) else: # check each provided url to find the file for base_url in url_list: online_filename = filename if base_url.count('github'): online_filename += '?raw=true' try: url = urljoin(base_url, online_filename) exists = url_exists(url) if exists: f = download_file(os.path.join(base_url, online_filename), show_progress=show_progress, timeout=timeout) real_name, ext = os.path.splitext(f) if ext == '.zip': print("Unpacking: {}".format(real_name)) with ZipFile(f, 'r') as zip_file: unzipped_f = zip_file.extract( real_name, sampledata_dir) os.remove(f) move( unzipped_f, os.path.join(sampledata_dir, uncompressed_filename)) return os.path.join(sampledata_dir, uncompressed_filename) else: # move files to the data directory move( f, os.path.join(sampledata_dir, uncompressed_filename)) return os.path.join(sampledata_dir, uncompressed_filename) except (socket.error, socket.timeout) as e: warnings.warn( "Download failed with error {}. Retrying with different mirror." .format(e), SunpyUserWarning) # if reach here then file has not been downloaded. warnings.warn("File {} not found.".format(filename), SunpyUserWarning) return None
"SWAP_LEVEL1_IMAGE": "swap_lv1_20110607_063329.fits", "AIA_171_ROLL_IMAGE": "aiacalibim5.fits.gz", "EVE_TIMESERIES": "20110607_EVE_L0CS_DIODES_1m.txt", # Uncomment this if it needs to be used. Commented out to save bandwidth. # "LYRA_LIGHTCURVE": ("lyra_20110810-000000_lev2_std.fits.gz", , "LYRA_LEVEL3_TIMESERIES": "lyra_20110607-000000_lev3_std.fits", "GOES_XRS_TIMESERIES": "go1520110607.fits", "GBM_TIMESERIES": "glg_cspec_n5_110607_v00.pha", "NOAAINDICES_TIMESERIES": "swpc_solar_cycle_indices.txt", "NOAAPREDICT_TIMESERIES": "predicted-sunspot-radio-flux.txt", "RHESSI_TIMESERIES": "hsi_obssumm_20110607_025.fits", "NORH_TIMESERIES": "tca110607.fits" } # Creating the directory for sample files to be downloaded sampledata_dir = get_and_create_sample_dir() def download_sample_data(show_progress=True): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- show_progress: `bool` Show a progress bar during download Returns ------- None """
def get_sample_file(filename, url_list, show_progress=True, overwrite=False, timeout=None): """ Downloads a sample file. Will download a sample data file and move it to the sample data directory. Also, uncompresses zip files if necessary. Returns the local file if exists. Parameters ---------- filename: `str` Name of the file url_list: `str` or `list` urls where to look for the file show_progress: `bool` Show a progress bar during download overwrite: `bool` If True download and overwrite an existing file. timeout: `float` The timeout in seconds. If `None` the default timeout is used from `astropy.utils.data.Conf.remote_timeout`. Returns ------- result: `str` The local path of the file. None if it failed. """ # Creating the directory for sample files to be downloaded sampledata_dir = get_and_create_sample_dir() if filename[-3:] == 'zip': uncompressed_filename = filename[:-4] else: uncompressed_filename = filename # check if the (uncompressed) file exists if not overwrite and os.path.isfile(os.path.join(sampledata_dir, uncompressed_filename)): return os.path.join(sampledata_dir, uncompressed_filename) else: # check each provided url to find the file for base_url in url_list: online_filename = filename if base_url.count('github'): online_filename += '?raw=true' try: url = urljoin(base_url, online_filename) exists = url_exists(url) if exists: f = download_file(os.path.join(base_url, online_filename), show_progress=show_progress, timeout=timeout) real_name, ext = os.path.splitext(f) if ext == '.zip': print("Unpacking: {}".format(real_name)) with ZipFile(f, 'r') as zip_file: unzipped_f = zip_file.extract(real_name, sampledata_dir) os.remove(f) move(unzipped_f, os.path.join(sampledata_dir, uncompressed_filename)) return os.path.join(sampledata_dir, uncompressed_filename) else: # move files to the data directory move(f, os.path.join(sampledata_dir, uncompressed_filename)) return os.path.join(sampledata_dir, uncompressed_filename) except (socket.error, socket.timeout) as e: warnings.warn("Download failed with error {}. \n" "Retrying with different mirror.".format(e)) # if reach here then file has not been downloaded. warnings.warn("File {} not found.".format(filename)) return None
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir / file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=fname) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results else: log.info( 'Failed to download one or more sample data files, retrying with a mirror.' ) for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) log.error( f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available." ) return results