def get_request(self, requests, path=None, overwrite=False, progress=True, downloader=None, wait=True, max_conn=4, **kwargs): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.ExportRequest`, `str`, `list` `~drms.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() kwargs['max_splits'] = kwargs.get('max_splits', 2) # Convert Responses to a list if not already if isinstance(requests, str) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, str): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests " "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, Path): path = str(path) if isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: if request.method == 'url-tar': fname = path.format(file=Path(request.tarfile).name) paths.append(os.path.expanduser(fname)) else: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) paths.append(fname) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite, max_conn=max_conn) if downloader.max_conn * kwargs['max_splits'] > 10: warnings.warn( ("JSOC does not support more than 10 parallel connections. " + "Changing the number of parallel connections to 8."), SunpyUserWarning) kwargs['max_splits'] = 2 downloader.max_conn = 4 urls = [] for request in requests: if request.status == 0: if request.protocol == 'as-is' or request.method == 'url-tar': urls.extend(list(request.urls.url)) else: for index, data in request.data.iterrows(): url_dir = request.request_url + '/' urls.append( urllib.parse.urljoin(url_dir, data['filename'])) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for aurl, fname in zip(urls, paths): downloader.enqueue_file(aurl, filename=fname, **kwargs) if dl_set and not wait: return Results() results = downloader.download() return results
def fetch(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [qrblock['url'] for qrblock in qres] filenames = [] local_filenames = [] for i, [url, qre] in enumerate(zip(urls, qres)): name = url.split('/')[-1] day = Time(qre['Time'].start.strftime('%Y-%m-%d')) + TimeDelta( i * u.day) if name not in filenames: filenames.append(name) if name.endswith('.gz'): local_filenames.append('{}SRS.txt'.format( day.strftime('%Y%m%d'))) else: local_filenames.append(name) if path is not None: path = pathlib.Path(path) # Files to be actually downloaded paths = self._get_full_filenames(qres, filenames, path) # Those files that will be present after get returns local_paths = self._get_full_filenames(qres, local_filenames, path) # remove duplicate urls. This will make paths and urls to have same number of elements. # OrderedDict is required to maintain ordering because it will be zipped with paths later urls = list(OrderedDict.fromkeys(urls)) dobj = Downloader(max_conn=5) for aurl, fname in zip(urls, paths): dobj.enqueue_file(aurl, filename=fname) paths = dobj.download() outfiles = [] for fname, srs_filename in zip(local_paths, local_filenames): name = fname.name past_year = False for i, fname2 in enumerate(paths): fname2 = pathlib.Path(fname2) if fname2.name.endswith('.txt'): continue year = fname2.name.split('_SRS')[0] if year in name: TarFile = tarfile.open(fname2) filepath = fname.parent member = TarFile.getmember('SRS/' + srs_filename) member.name = name TarFile.extract(member, path=filepath) TarFile.close() outfiles.append(fname) past_year = True break if past_year is False: outfiles.append(fname) paths.data = list(map(str, outfiles)) return paths
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.base_client.QueryResponseTable` or `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` or `~sunpy.net.base_client.QueryResponseTable` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `~sunpy.net.base_client.BaseQueryResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is None: path = Path(config.get('downloads', 'download_dir')) / '{file}' elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' else: path = Path(path) path = path.expanduser() # Ensure we have write permissions to the path exists = list(filter(lambda p: p.exists(), Path(path).resolve().parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError("wait is not a valid keyword argument to Fido.fetch.") # TODO: Remove when parfive allows us to special case URLS. # Avoid more than one connection for JSOC only requests. from sunpy.net.jsoc import JSOCClient is_jsoc_only = False for query_result in query_results: if isinstance(query_result, UnifiedResponse): is_jsoc_only = all([isinstance(result.client, JSOCClient) for result in query_result]) elif isinstance(query_result, QueryResponseTable): is_jsoc_only = all([isinstance(result.table.client, JSOCClient) for result in query_result]) if downloader is None: if is_jsoc_only: max_conn = 1 kwargs['max_splits'] = 1 downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, parfive.Downloader): raise TypeError("The downloader argument must be a parfive.Downloader instance.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError("If any arguments to fetch are `parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: if isinstance(query_result, QueryResponseRow): responses = [query_result.as_table()] elif isinstance(query_result, QueryResponseTable): responses = [query_result] elif isinstance(query_result, UnifiedResponse): responses = query_result else: raise ValueError(f"Query result has an unrecognized type: {type(query_result)} " "Allowed types are QueryResponseRow, QueryResponseTable or UnifiedResponse.") for block in responses: result = block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs) if result not in (NotImplemented, None): reslist.append(result) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results
def fetch(self, qres, path=None, overwrite=False, progress=True, downloader=None, wait=True, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory, or file template including the ``{file}`` string which will be replaced with the filename. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if ``downloader`` is not `None`. Returns ------- `parfive.Results` """ if path is not None: path = Path(path) if isinstance(qres, QueryResponseRow): qres = qres.as_table() urls = [] if len(qres): urls = list(qres['url']) filenames = [url.split('/')[-1] for url in urls] paths = self._get_full_filenames(qres, filenames, path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) for url, filename in zip(urls, paths): downloader.enqueue_file(url, filename=filename) if dl_set and not wait: return return downloader.download()
def fetch(self, query_response, path=None, methods=None, site=None, progress=True, overwrite=False, downloader=None, wait=True): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.VSOQueryResponseTable QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : `list` of `str` Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. ``PREFIX-SUFFIX_SUFFIX2_SUFFIX3``. The full list of `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if ``'unique'`` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if ``downloader`` is not `None`. Returns ------- out : `parfive.Results` Object that supplies a list of filenames and any errors. Examples -------- >>> files = fetch(qr) # doctest:+SKIP """ if path is None: path = Path(config.get('downloads', 'download_dir')) / '{file}' elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' else: path = Path(path) path = path.expanduser() dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) if isinstance(query_response, (QueryResponse, list)): query_response = VSOQueryResponseTable.from_zeep_response(query_response, client=self, _sort=False) if isinstance(query_response, QueryResponseRow): query_response = query_response.as_table() if not len(query_response): return downloader.download() if wait else Results() # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse(self.api.service.GetData(data_request)) err_results = self.download_all(data_response, methods, downloader, str(path), self.by_fileid(query_response)) if dl_set and not wait: return err_results results = downloader.download() results += err_results results._errors += err_results.errors return results
def download_sample_data(overwrite=False): """ Download all sample data at once. This will overwrite any existing files. Parameters ---------- overwrite: `bool` Overwrite existing sample data. """ # Workaround for tox only. This is not supported as a user option sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False) if sampledata_dir: sampledata_dir = Path(sampledata_dir).expanduser().resolve() _is_writable_dir(sampledata_dir) else: # Creating the directory for sample files to be downloaded sampledata_dir = Path(get_and_create_sample_dir()) dl = Downloader(overwrite=overwrite) first_url = _base_urls[0] already_downloaded = [] for file_name in _sample_files.keys(): url = urljoin(first_url, file_name) fname = sampledata_dir / file_name # We have to avoid calling download if we already have all the files. if fname.exists() and not overwrite: already_downloaded.append(fname) else: dl.enqueue_file(url, filename=fname) if dl.queued_downloads: results = dl.download() else: return already_downloaded if not results.errors: return results else: log.info( 'Failed to download one or more sample data files, retrying with a mirror.' ) for retry_url in _base_urls[1:]: for i, err in enumerate(results.errors): file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) # Overwrite the parfive error to change the url to a mirror new_url = urljoin(retry_url, file_name) results._errors[i] = _error(err.filepath_partial, new_url, err.exception) results = dl.retry(results) if not results.errors: return results for err in results.errors: file_name = err.filepath_partial().name log.debug( f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}" ) log.error( f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available." ) return results
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `UnifiedResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is not None: exists = list( filter(lambda p: p.exists(), Path(path).resolve().parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError( "wait is not a valid keyword argument to Fido.fetch.") if downloader is None: downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, parfive.Downloader): raise TypeError( "The downloader argument must be a parfive.Downloader object.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError( "If any arguments to fetch are " "`parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: for block in query_result.responses: reslist.append( block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs)) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if result is None: continue if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results