def test_enable_no_aiofiles(): with pytest.warns(ParfiveUserWarning): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=True)) assert dl.config.use_aiofiles is False dl = Downloader(config=parfive.SessionConfig(use_aiofiles=False)) assert dl.config.use_aiofiles is False
def run(self): try: from parfive import Downloader except ImportError: log.error("To use gammapy download, install the parfive package!") return if self.listfiles: log.info(f"Content will be downloaded in {self.outfolder}") dl = Downloader(progress=self.progress, file_progress=False) for rec in self.listfiles: url = self.listfiles[rec]["url"] path = self.outfolder / self.listfiles[rec]["path"] md5 = "" if "hashmd5" in self.listfiles[rec]: md5 = self.listfiles[rec]["hashmd5"] retrieve = True if md5 and path.exists(): md5local = hashlib.md5(path.read_bytes()).hexdigest() if md5local == md5: retrieve = False if retrieve: dl.enqueue_file(url, path=str(path.parent)) log.info(f"{dl.queued_downloads} files to download.") res = dl.download() log.info(f"{len(res)} files downloaded.") for err in res.errors: _, _, exception = err log.error(f"Error: {exception}")
def fetch(self, qres, path=None, overwrite=False, progress=True, downloader=None, wait=True): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory, or file template including the ``{file}`` string which will be replaced with the filename. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- results: `parfive.Results` """ if path is not None: path = Path(path) urls = [qrblock.url for qrblock in qres] filenames = [url.split('/')[-1] for url in urls] paths = self._get_full_filenames(qres, filenames, path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) for url, filename in zip(urls, paths): downloader.enqueue_file(url, filename=filename) if dl_set and not wait: return return downloader.download()
def fetch(self, qres, path=None, overwrite=False, progress=True, downloader=None, wait=True): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory, or file template including the ``{file}`` string which will be replaced with the filename. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- results: `parfive.Results` """ if path is not None: path = Path(path) urls = [qrblock.url for qrblock in qres.blocks] filenames = [url.split('/')[-1] for url in urls] paths = self._get_full_filenames(qres, filenames, path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) for url, filename in zip(urls, paths): downloader.enqueue_file(url, filename=filename) if dl_set and not wait: return return downloader.download()
def download_files_parfive(list_of_files, pathy="/Users/laurahayes/ionospheric_work/ionospheric-analysis/vlf_codes/vlf_bas_files/"): dl = Downloader() for f in list_of_files: filename = f.split('/')[-1] dl.enqueue_file(f, path=pathy) files = dl.download()
def test_multipart_with_error(multipartserver, tmp_path): multipartserver.callback = partial(error_on_nth_request, 3) dl = Downloader(progress=False) max_splits = 5 dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits) files = dl.download() assert len(files) == 0 assert len(files.errors) == 1 assert isinstance(files.errors[0].exception, MultiPartDownloadError) expected_file = tmp_path / "testfile.txt" assert not expected_file.exists()
def test_multipart(multipartserver, tmp_path): dl = Downloader(progress=False) max_splits = 5 dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits) files = dl.download() # Verify we transferred all the content with open(files[0], "rb") as fobj: assert fobj.read() == b"a" * 100 # Assert that we made the expected number of requests assert len(multipartserver.requests) == max_splits + 1 assert "HTTP_RANGE" not in multipartserver.requests[0] for split_req in multipartserver.requests[1:]: assert "HTTP_RANGE" in split_req
def main(): args = parse_args(sys.argv[1:]) downloader = Downloader(max_conn=args.max_conn, file_progress=not args.no_file_progress, overwrite=args.overwrite) for url in args.urls: downloader.enqueue_file(url, path=args.directory) results = downloader.download() for i in results: print(i) err_str = '' for err in results.errors: err_str += f'{err.url} \t {err.exception}\n' if err_str: sys.exit(err_str)
def download_beacon(dtime): datestr = dtime.strftime('%Y%m%d') for time in [ '000530', '001615', '005530', '010530', '011530', '043530', ]: url = ('https://stereo-ssc.nascom.nasa.gov/pub/beacon/ahead/secchi/img' f'/euvi/{datestr}/{datestr}_{time}_n7euA.fts') print(url) dl = Downloader() dl.enqueue_file(url, path=map_path(dtime).parent) files = dl.download() if len(files.errors): continue pathlib.Path(files[0]).replace(map_path(dtime)) return raise RuntimeError(f'No EUVI beacon map available for {dtime}')
def run(self): try: from parfive import Downloader except ImportError: log.error( "The parfive package needs to be installed to download files with gammapy download" ) return if self.listfiles: log.info("Content will be downloaded in {}".format(self.outfolder)) dl = Downloader(progress=self.progress) for rec in self.listfiles: url = self.listfiles[rec]["url"] path = self.outfolder / self.listfiles[rec]["path"] md5 = "" if "hashmd5" in self.listfiles[rec]: md5 = self.listfiles[rec]["hashmd5"] retrieve = True if md5 and path.exists(): md5local = hashlib.md5(path.read_bytes()).hexdigest() if md5local == md5: retrieve = False if retrieve: dl.enqueue_file(url, path=str(path.parent)) try: dl.download() except Exception as ex: log.error("Failed to download files.") log.error(ex)
def mas_helio(): """ Get some MAS heliospheric data files. These are taken from CR2210, which is used for PSP data comparisons in the documentation examples. """ mas_helio_dir = download_dir / 'mas_helio' mas_helio_dir.mkdir(parents=True, exist_ok=True) base_url = 'http://www.predsci.com/data/runs/cr2210-medium/hmi_masp_mas_std_0201/helio/{var}002.hdf' # Create a downloader to queue the files to be downloaded dl = Downloader() vars = ['rho', 'vr', 'br'] for var in vars: file = mas_helio_dir / f'{var}002.hdf' if file.exists(): continue else: remote_file = base_url.format(var=var) dl.enqueue_file(remote_file, path=mas_helio_dir) # Download the files if dl.queued_downloads > 0: dl.download() return mas_helio_dir.resolve()
def fetch(self, query_results: QueryResponseTable, *, path: os.PathLike = None, downloader: parfive.Downloader, **kwargs): """ Fetch asdf files describing the datasets. Parameters ---------- query_results: Results to download. path : `str` or `pathlib.Path`, optional Path to the download directory downloader : `parfive.Downloader` The download manager to use. """ # This logic is being upstreamed into Fido hopefully in 2.1rc4 if path is None: path = Path(config.get( 'downloads', 'download_dir')) / '{file}' # pragma: no cover elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path): path = Path(path) / '{file}' # pragma: no cover else: path = Path(path) # pragma: no cover path = path.expanduser() if not len(query_results): return for row in query_results: url = f"{self._BASE_DOWNLOAD_URL}/asdf?datasetId={row['Dataset ID']}" # Set max_splits here as the metadata streamer doesn't like accept-range at the moment. downloader.enqueue_file(url, filename=partial(self._make_filename, path, row), max_splits=1)
def download(self, url, path): downloader = Downloader() path = Path(path) filename = path.name directory = path.parent downloader.enqueue_file(url, directory, filename) try: downloader.download() except Exception as e: raise DownloaderError from e
def getenvironment(self): try: from parfive import Downloader except ImportError: log.error("To use gammapy download, install the parfive package!") return dl = Downloader(progress=False, file_progress=False) filename_env = "gammapy-" + self.release + "-environment.yml" url_file_env = BASE_URL + "/install/" + filename_env filepath_env = str(self.outfolder / filename_env) dl.enqueue_file(url_file_env, path=filepath_env) try: log.info(f"Downloading {url_file_env}") Path(filepath_env).parent.mkdir(parents=True, exist_ok=True) dl.download() except Exception as ex: log.error(ex) exit()
def main(): args = parser.parse_args() dataset = args.dataset # Load annotations annFile = '../annotations/instances_{}.json'.format(dataset) assert os.path.isfile(annFile) cocoPath = './{}'.format(dataset) try: os.mkdir(cocoPath) except FileExistsError: pass # Init coco coco = COCO(annFile) personCatID = coco.getCatIds(catNms=['person'])[0] cocoImageIds = coco.getImgIds(catIds=personCatID) print("Putting all urls into big list!") urls = [] for i in cocoImageIds: cocoImg = coco.loadImgs(i)[0] annIds = coco.getAnnIds(imgIds=cocoImg["id"], catIds=personCatID, iscrowd=None) annotation = coco.loadAnns(annIds)[0] if annotation["iscrowd"] == 0: urls.append(cocoImg["coco_url"]) print("Enqueueing download of {} items".format(len(urls))) dl = Downloader() for url in urls: dl.enqueue_file(url, path=cocoPath) print("Downloading files...") dl.download()
def fetch(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [qrblock.url for qrblock in qres] filenames = [] local_filenames = [] for i, [url, qre] in enumerate(zip(urls, qres)): name = url.split('/')[-1] # temporary fix !!! coz All QRBs have same start_time values day = Time(qre.time.start.strftime('%Y-%m-%d')) + TimeDelta(i*u.day) if name not in filenames: filenames.append(name) if name.endswith('.gz'): local_filenames.append('{}SRS.txt'.format(day.strftime('%Y%m%d'))) else: local_filenames.append(name) # Files to be actually downloaded paths = self._get_full_filenames(qres, filenames, path) # Those files that will be present after get returns local_paths = self._get_full_filenames(qres, local_filenames, path) # remove duplicate urls. This will make paths and urls to have same number of elements. # OrderedDict is required to maintain ordering because it will be zipped with paths later urls = list(OrderedDict.fromkeys(urls)) dobj = Downloader(max_conn=5) for aurl, fname in zip(urls, paths): dobj.enqueue_file(aurl, filename=fname) paths = dobj.download() outfiles = [] for fname, srs_filename in zip(local_paths, local_filenames): name = fname.name past_year = False for i, fname2 in enumerate(paths): fname2 = pathlib.Path(fname2) if fname2.name.endswith('.txt'): continue year = fname2.name.split('_SRS')[0] if year in name: TarFile = tarfile.open(fname2) filepath = fname.parent member = TarFile.getmember('SRS/' + srs_filename) member.name = name TarFile.extract(member, path=filepath) TarFile.close() outfiles.append(fname) past_year = True break if past_year is False: outfiles.append(fname) paths.data = list(map(str, outfiles)) return paths
def fetch(self, query_response, path=None, methods=None, site=None, progress=True, overwrite=False, downloader=None, wait=True): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- out : `parfive.Results` Object that supplies a list of filenames and any errors. Examples -------- >>> files = fetch(qr) # doctest:+SKIP """ if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress) fileids = VSOClient.by_fileid(query_response) if not fileids: return downloader.download() # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse(self.api.service.GetData(data_request)) err_results = self.download_all(data_response, methods, downloader, path, fileids) if dl_set and not wait: return err_results results = downloader.download() results += err_results results._errors += err_results.errors return results
def get_request(self, requests, path=None, overwrite=False, progress=True, downloader=None, wait=True): """ Query JSOC to see if the request(s) is ready for download. If the request is ready for download, it will then download it. Parameters ---------- requests : `~drms.ExportRequest`, `str`, `list` `~drms.ExportRequest` objects or `str` request IDs or lists returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`. path : `str` Path to save data to, defaults to SunPy download dir. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bar will be shown. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- res: `~sunpy.net.download.Results` A `~sunpy.net.download.Results` instance or `None` if no URLs to download """ c = drms.Client() # Convert Responses to a list if not already if isinstance(requests, str) or not isiterable(requests): requests = [requests] # Ensure all the requests are drms ExportRequest objects for i, request in enumerate(requests): if isinstance(request, str): r = c.export_from_id(request) requests[i] = r # We only download if all are finished if not all([r.has_succeeded() for r in requests]): raise NotExportedError("Can not download as not all the requests " "have been exported for download yet.") # Ensure path has a {file} in it if path is None: default_dir = config.get("downloads", "download_dir") path = os.path.join(default_dir, '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') paths = [] for request in requests: for filename in request.data['filename']: # Ensure we don't duplicate the file extension ext = os.path.splitext(filename)[1] if path.endswith(ext): fname = path.strip(ext) else: fname = path fname = fname.format(file=filename) fname = os.path.expanduser(fname) paths.append(fname) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress, overwrite=overwrite) urls = [] for request in requests: if request.status == 0: for index, data in request.data.iterrows(): url_dir = request.request_url + '/' urls.append(urllib.parse.urljoin(url_dir, data['filename'])) if urls: if progress: print_message = "{0} URLs found for download. Full request totalling {1}MB" print(print_message.format(len(urls), request._d['size'])) for aurl, fname in zip(urls, paths): downloader.enqueue_file(aurl, filename=fname) if dl_set and not wait: return Results() results = downloader.download() return results
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `UnifiedResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.vso.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if "wait" in kwargs: raise ValueError("wait is not a valid keyword argument to Fido.fetch.") if downloader is None: downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, Downloader): raise TypeError("The downloader argument must be a parfive.Downloader object.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError("If any arguments to fetch are " "`parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: for block in query_result.responses: reslist.append(block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs)) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if result is None: continue if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results
def euvi_pch_data_download(rootpath='', start_date='2007/05/01', end_date='2019/01/01'): # Crawl through and scrape the EUVI wavelet images url_head = 'http://sd-www.jhuapl.edu/secchi/wavelets/fits/' start_date = parse_time(start_date).to_datetime() end_date = parse_time(end_date).to_datetime() resp = request.urlopen(url_head) soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'), features="lxml") subs = [ link.text for link in soup.find_all('a', href=True) if link.text.endswith('/') ] substime = [datetime.datetime.strptime(s, '%Y%m/') for s in subs] gooddate = [ s >= (start_date - datetime.timedelta(days=start_date.day - 1)) and (s <= end_date) for s in substime ] url_subdir1 = [ parse.urljoin(url_head, sub_dir) for sub_dir, gd in zip(subs, gooddate) if gd ] dl = Downloader() if not rootpath: save_dir = os.path.abspath(os.path.curdir) else: save_dir = os.path.abspath(rootpath) # crawling until a full list has been generated for subdir1 in url_subdir1: resp = request.urlopen(subdir1) soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'), features="lxml") subs = [ link.text for link in soup.find_all('a', href=True) if link.text.endswith('/') ] url_subdir2 = [parse.urljoin(subdir1, sub_dir) for sub_dir in subs] for subdir2 in url_subdir2: resp = request.urlopen(subdir2) soup = BeautifulSoup( resp, from_encoding=resp.info().get_param('charset'), features="lxml") subs = [ link.text for link in soup.find_all('a', href=True) if link.text.endswith('/') ] url_subdir3 = [parse.urljoin(subdir2, sub_dir) for sub_dir in subs] for subdir3 in url_subdir3: subs = [] resp = request.urlopen(subdir3) soup = BeautifulSoup( resp, from_encoding=resp.info().get_param('charset'), features="lxml") subs = [ link.text for link in soup.find_all('a', href=True) if link.text.endswith('.fts.gz') ] if len(subs) > 1: image_url = [ parse.urljoin(subdir3, sub_dir) for sub_dir in subs ] # save_path = [os.path.join(save_dir, subdir3.split('fits/')[1], path) for path in subs] save_path = [ os.path.join(save_dir, subdir3.split('fits/')[1]) for path in subs ] image_times = [ datetime_from_euvi_filename(path) for path in subs ] # grab every 4 hours dt = list( np.logical_not([ np.mod((time - image_times[0]).seconds, 14400) for time in image_times ])) if np.sum(dt) < 6: dt2 = list( np.logical_not([ np.mod((time - image_times[1]).seconds, 14400) for time in image_times ])) if len(dt2) > len(dt): dt = dt2 st = [tt >= start_date for tt in image_times] et = [tt <= end_date for tt in image_times] goodness = [(aa and bb and cc) for aa, bb, cc in zip(dt, st, et)] if np.sum(goodness): os.makedirs(os.path.join(save_dir, subdir3.split('fits/')[1]), exist_ok=True) # download each image for good_image, image_loc, image_destination in zip( goodness, image_url, save_path): if good_image: dl.enqueue_file(image_loc, path=image_destination) files = dl.download() print('Downloaded EUVI ' + wavelength_from_euvi_filename(files[0]) + 'images for ' + image_times[0].strftime('%Y-%m-%d')) else: print('Too few images detected in: ', subdir3)
def test_enable_aiofiles_env_overwrite_always_enabled(use_aiofiles): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=use_aiofiles)) assert dl.config.use_aiofiles is True
def fetch(self, *query_results, path=None, max_conn=5, progress=True, overwrite=False, downloader=None, **kwargs): """ Download the records represented by `~sunpy.net.fido_factory.UnifiedResponse` objects. Parameters ---------- query_results : `sunpy.net.fido_factory.UnifiedResponse` Container returned by query method, or multiple. path : `str` The directory to retrieve the files into. Can refer to any fields in `UnifiedResponse.response_block_properties` via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. If specified the ``max_conn``, ``progress`` and ``overwrite`` arguments are ignored. Returns ------- `parfive.Results` Examples -------- >>> from sunpy.net.attrs import Time, Instrument >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT')) # doctest: +REMOTE_DATA >>> filepaths = Fido.fetch(unifresp) # doctest: +SKIP If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``. >>> filepaths = Fido.fetch(filepaths) # doctest: +SKIP """ if path is not None: exists = list(filter(lambda p: p.exists(), Path(path).parents)) if not os.access(exists[0], os.W_OK): raise PermissionError('You do not have permission to write' f' to the directory {exists[0]}.') if "wait" in kwargs: raise ValueError("wait is not a valid keyword argument to Fido.fetch.") if downloader is None: downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite) elif not isinstance(downloader, Downloader): raise TypeError("The downloader argument must be a parfive.Downloader object.") # Handle retrying failed downloads retries = [isinstance(arg, Results) for arg in query_results] if all(retries): results = Results() for retry in query_results: dr = downloader.retry(retry) results.data += dr.data results._errors += dr._errors return results elif any(retries): raise TypeError("If any arguments to fetch are " "`parfive.Results` objects, all arguments must be.") reslist = [] for query_result in query_results: for block in query_result.responses: reslist.append(block.client.fetch(block, path=path, downloader=downloader, wait=False, **kwargs)) results = downloader.download() # Combine the results objects from all the clients into one Results # object. for result in reslist: if result is None: continue if not isinstance(result, Results): raise TypeError( "If wait is False a client must return a parfive.Downloader and either None" " or a parfive.Results object.") results.data += result.data results._errors += result.errors return results
def fetch(self, query_response, path=None, methods=None, site=None, progress=True, overwrite=False, downloader=None, wait=True): """ Download data specified in the query_response. Parameters ---------- query_response : sunpy.net.vso.QueryResponse QueryResponse containing the items to be downloaded. path : str Specify where the data is to be downloaded. Can refer to arbitrary fields of the QueryResponseItem (instrument, source, time, ...) via string formatting, moreover the file-name of the file downloaded can be referred to as file, e.g. "{source}/{instrument}/{time.start}/{file}". methods : {list of str} Download methods, defaults to URL-FILE_Rice then URL-FILE. Methods are a concatenation of one PREFIX followed by any number of SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`. The full list of `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_ and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_ are listed on the VSO site. site : str There are a number of caching mirrors for SDO and other instruments, some available ones are listed below. =============== ======================================================== NSO National Solar Observatory, Tucson (US) SAO (aka CFA) Smithonian Astronomical Observatory, Harvard U. (US) SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US) ROB Royal Observatory of Belgium (Belgium) MPS Max Planck Institute for Solar System Research (Germany) UCLan University of Central Lancashire (UK) IAS Institut Aeronautique et Spatial (France) KIS Kiepenheuer-Institut fur Sonnenphysik Germany) NMSU New Mexico State University (US) =============== ======================================================== progress : `bool`, optional If `True` show a progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. downloader : `parfive.Downloader`, optional The download manager to use. wait : `bool`, optional If `False` ``downloader.download()`` will not be called. Only has any effect if `downloader` is not `None`. Returns ------- out : `parfive.Results` Object that supplies a list of filenames and any errors. Examples -------- >>> files = fetch(qr) # doctest:+SKIP """ if path is None: path = os.path.join(config.get('downloads', 'download_dir'), '{file}') elif isinstance(path, str) and '{file}' not in path: path = os.path.join(path, '{file}') path = os.path.expanduser(path) dl_set = True if not downloader: dl_set = False downloader = Downloader(progress=progress) fileids = VSOClient.by_fileid(query_response) if not fileids: return downloader.download() if wait else Results() # Adding the site parameter to the info info = {} if site is not None: info['site'] = site VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse") data_request = self.make_getdatarequest(query_response, methods, info) data_response = VSOGetDataResponse( self.api.service.GetData(data_request)) err_results = self.download_all(data_response, methods, downloader, path, fileids) if dl_set and not wait: return err_results results = downloader.download() results += err_results results._errors += err_results.errors return results
def fetch(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [qrblock.url for qrblock in qres] filenames = [] local_filenames = [] for i, [url, qre] in enumerate(zip(urls, qres)): name = url.split('/')[-1] # temporary fix !!! coz All QRBs have same start_time values day = Time(qre.time.start.strftime('%Y-%m-%d')) + TimeDelta( i * u.day) if name not in filenames: filenames.append(name) if name.endswith('.gz'): local_filenames.append('{}SRS.txt'.format( day.strftime('%Y%m%d'))) else: local_filenames.append(name) # Files to be actually downloaded paths = self._get_full_filenames(qres, filenames, path) # Those files that will be present after get returns local_paths = self._get_full_filenames(qres, local_filenames, path) # remove duplicate urls. This will make paths and urls to have same number of elements. # OrderedDict is required to maintain ordering because it will be zipped with paths later urls = list(OrderedDict.fromkeys(urls)) dobj = Downloader(max_conn=5) for aurl, fname in zip(urls, paths): dobj.enqueue_file(aurl, filename=fname) paths = dobj.download() outfiles = [] for fname, srs_filename in zip(local_paths, local_filenames): name = fname.name past_year = False for i, fname2 in enumerate(paths): fname2 = pathlib.Path(fname2) if fname2.name.endswith('.txt'): continue year = fname2.name.split('_SRS')[0] if year in name: TarFile = tarfile.open(fname2) filepath = fname.parent member = TarFile.getmember('SRS/' + srs_filename) member.name = name TarFile.extract(member, path=filepath) TarFile.close() outfiles.append(fname) past_year = True break if past_year is False: outfiles.append(fname) paths.data = list(map(str, outfiles)) return paths
def test_enable_aiofiles_constructor(use_aiofiles): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=use_aiofiles)) assert (dl.config.use_aiofiles == use_aiofiles ), f"expected={use_aiofiles}, got={dl.config.use_aiofiles}"