def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir / file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=fname) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results else: log.info( 'Failed to download one or more sample data files, retrying with a mirror.' ) for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) log.error( f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available." ) return results
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.base_client.QueryResponseTable` or `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` or `~sunpy.net.base_client.QueryResponseTable` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `~sunpy.net.base_client.BaseQueryResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is None: path = Path(config.get('downloads', 'download_dir')) / '{file}' elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' else: path = Path(path) path = path.expanduser() # Ensure we have write permissions to the path exists = list(filter(lambda p: p.exists(), Path(path).resolve().parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError("wait is not a valid keyword argument to Fido.fetch.") # TODO: Remove when parfive allows us to special case URLS. # Avoid more than one connection for JSOC only requests. from sunpy.net.jsoc import JSOCClient is_jsoc_only = False for query_result in query_results: if isinstance(query_result, UnifiedResponse): is_jsoc_only = all([isinstance(result.client, JSOCClient) for result in query_result]) elif isinstance(query_result, QueryResponseTable): is_jsoc_only = all([isinstance(result.table.client, JSOCClient) for result in query_result]) if downloader is None: if is_jsoc_only: max_conn = 1 kwargs['max_splits'] = 1 downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, parfive.Downloader): raise TypeError("The downloader argument must be a parfive.Downloader instance.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError("If any arguments to fetch are `parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: if isinstance(query_result, QueryResponseRow): responses = [query_result.as_table()] elif isinstance(query_result, QueryResponseTable): responses = [query_result] elif isinstance(query_result, UnifiedResponse): responses = query_result else: raise ValueError(f"Query result has an unrecognized type: {type(query_result)} " "Allowed types are QueryResponseRow, QueryResponseTable or UnifiedResponse.") for block in responses: result = block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs) if result not in (NotImplemented, None): reslist.append(result) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `UnifiedResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is not None: exists = list( filter(lambda p: p.exists(), Path(path).resolve().parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError( "wait is not a valid keyword argument to Fido.fetch.") if downloader is None: downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, parfive.Downloader): raise TypeError( "The downloader argument must be a parfive.Downloader object.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError( "If any arguments to fetch are " "`parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: for block in query_result.responses: reslist.append( block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs)) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if result is None: continue if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results