Exemple #1
0
def download_sample_data(overwrite=False):
    """
    Download all sample data at once. This will overwrite any existing files.

    Parameters
    ----------
    overwrite: `bool`
        Overwrite existing sample data.
    """
    # Workaround for tox only. This is not supported as a user option
    sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False)
    if sampledata_dir:
        sampledata_dir = Path(sampledata_dir).expanduser().resolve()
        _is_writable_dir(sampledata_dir)
    else:
        # Creating the directory for sample files to be downloaded
        sampledata_dir = Path(get_and_create_sample_dir())

    dl = Downloader(overwrite=overwrite)

    first_url = _base_urls[0]

    already_downloaded = []
    for file_name in _sample_files.keys():
        url = urljoin(first_url, file_name)
        fname = sampledata_dir / file_name
        # We have to avoid calling download if we already have all the files.
        if fname.exists() and not overwrite:
            already_downloaded.append(fname)
        else:
            dl.enqueue_file(url, filename=fname)

    if dl.queued_downloads:
        results = dl.download()
    else:
        return already_downloaded

    if not results.errors:
        return results
    else:
        log.info(
            'Failed to download one or more sample data files, retrying with a mirror.'
        )

    for retry_url in _base_urls[1:]:
        for i, err in enumerate(results.errors):
            file_name = err.filepath_partial().name
            log.debug(
                f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}"
            )
            # Overwrite the parfive error to change the url to a mirror
            new_url = urljoin(retry_url, file_name)
            results._errors[i] = _error(err.filepath_partial, new_url,
                                        err.exception)

        results = dl.retry(results)

        if not results.errors:
            return results

    for err in results.errors:
        file_name = err.filepath_partial().name
        log.debug(
            f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}"
        )
        log.error(
            f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available."
        )

    return results
Exemple #2
0
    def fetch(self, *query_results, path=None, max_conn=5, progress=True,
              overwrite=False, downloader=None, **kwargs):
        """
        Download the records represented by `~sunpy.net.base_client.QueryResponseTable` or
        `~sunpy.net.fido_factory.UnifiedResponse` objects.

        Parameters
        ----------
        query_results : `sunpy.net.fido_factory.UnifiedResponse` or `~sunpy.net.base_client.QueryResponseTable`
            Container returned by query method, or multiple.
        path : `str`
            The directory to retrieve the files into. Can refer to any fields
            in `~sunpy.net.base_client.BaseQueryResponse.response_block_properties` via string formatting,
            moreover the file-name of the file downloaded can be referred to as file,
            e.g. "{source}/{instrument}/{time.start}/{file}".
        max_conn : `int`, optional
            The number of parallel download slots.
        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.
        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if ``'unique'`` the filename
            will be modified to be unique.
        downloader : `parfive.Downloader`, optional
            The download manager to use. If specified the ``max_conn``,
            ``progress`` and ``overwrite`` arguments are ignored.

        Returns
        -------
        `parfive.Results`

        Examples
        --------
        >>> from sunpy.net.attrs import Time, Instrument
        >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT'))  # doctest: +REMOTE_DATA
        >>> filepaths = Fido.fetch(unifresp)  # doctest: +SKIP

        If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``.

        >>> filepaths = Fido.fetch(filepaths)  # doctest: +SKIP

        """
        if path is None:
            path = Path(config.get('downloads', 'download_dir')) / '{file}'
        elif isinstance(path, (str, os.PathLike)) and '{file}' not in str(path):
            path = Path(path) / '{file}'
        else:
            path = Path(path)
        path = path.expanduser()

        # Ensure we have write permissions to the path
        exists = list(filter(lambda p: p.exists(), Path(path).resolve().parents))
        if not os.access(exists[0], os.W_OK):
            raise PermissionError('You do not have permission to write'
                                  f' to the directory {exists[0]}.')

        if "wait" in kwargs:
            raise ValueError("wait is not a valid keyword argument to Fido.fetch.")

        # TODO: Remove when parfive allows us to special case URLS.
        # Avoid more than one connection for JSOC only requests.
        from sunpy.net.jsoc import JSOCClient

        is_jsoc_only = False
        for query_result in query_results:
            if isinstance(query_result, UnifiedResponse):
                is_jsoc_only = all([isinstance(result.client, JSOCClient) for result in query_result])
            elif isinstance(query_result, QueryResponseTable):
                is_jsoc_only = all([isinstance(result.table.client, JSOCClient) for result in query_result])
        if downloader is None:
            if is_jsoc_only:
                max_conn = 1
                kwargs['max_splits'] = 1
            downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite)
        elif not isinstance(downloader, parfive.Downloader):
            raise TypeError("The downloader argument must be a parfive.Downloader instance.")

        # Handle retrying failed downloads
        retries = [isinstance(arg, Results) for arg in query_results]
        if all(retries):
            results = Results()
            for retry in query_results:
                dr = downloader.retry(retry)
                results.data += dr.data
                results._errors += dr._errors
            return results
        elif any(retries):
            raise TypeError("If any arguments to fetch are `parfive.Results` objects, all arguments must be.")

        reslist = []
        for query_result in query_results:
            if isinstance(query_result, QueryResponseRow):
                responses = [query_result.as_table()]
            elif isinstance(query_result, QueryResponseTable):
                responses = [query_result]
            elif isinstance(query_result, UnifiedResponse):
                responses = query_result
            else:
                raise ValueError(f"Query result has an unrecognized type: {type(query_result)} "
                                 "Allowed types are QueryResponseRow, QueryResponseTable or UnifiedResponse.")
            for block in responses:
                result = block.client.fetch(block, path=path,
                                            downloader=downloader,
                                            wait=False, **kwargs)
                if result not in (NotImplemented, None):
                    reslist.append(result)

        results = downloader.download()
        # Combine the results objects from all the clients into one Results
        # object.
        for result in reslist:
            if not isinstance(result, Results):
                raise TypeError(
                    "If wait is False a client must return a parfive.Downloader and either None"
                    " or a parfive.Results object.")
            results.data += result.data
            results._errors += result.errors

        return results
Exemple #3
0
    def fetch(self,
              *query_results,
              path=None,
              max_conn=5,
              progress=True,
              overwrite=False,
              downloader=None,
              **kwargs):
        """
        Download the records represented by
        `~sunpy.net.fido_factory.UnifiedResponse` objects.

        Parameters
        ----------
        query_results : `sunpy.net.fido_factory.UnifiedResponse`
            Container returned by query method, or multiple.
        path : `str`
            The directory to retrieve the files into. Can refer to any fields
            in `UnifiedResponse.response_block_properties` via string formatting,
            moreover the file-name of the file downloaded can be referred to as file,
            e.g. "{source}/{instrument}/{time.start}/{file}".
        max_conn : `int`, optional
            The number of parallel download slots.
        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.
        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.
        downloader : `parfive.Downloader`, optional
            The download manager to use. If specified the ``max_conn``,
            ``progress`` and ``overwrite`` arguments are ignored.

        Returns
        -------
        `parfive.Results`

        Examples
        --------
        >>> from sunpy.net.attrs import Time, Instrument
        >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT'))  # doctest: +REMOTE_DATA
        >>> filepaths = Fido.fetch(unifresp)  # doctest: +SKIP

        If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``.

        >>> filepaths = Fido.fetch(filepaths)  # doctest: +SKIP

        """
        if path is not None:
            exists = list(
                filter(lambda p: p.exists(),
                       Path(path).resolve().parents))

            if not os.access(exists[0], os.W_OK):
                raise PermissionError('You do not have permission to write'
                                      f' to the directory {exists[0]}.')

        if "wait" in kwargs:
            raise ValueError(
                "wait is not a valid keyword argument to Fido.fetch.")

        if downloader is None:
            downloader = Downloader(max_conn=max_conn,
                                    progress=progress,
                                    overwrite=overwrite)
        elif not isinstance(downloader, parfive.Downloader):
            raise TypeError(
                "The downloader argument must be a parfive.Downloader object.")

        # Handle retrying failed downloads
        retries = [isinstance(arg, Results) for arg in query_results]
        if all(retries):
            results = Results()
            for retry in query_results:
                dr = downloader.retry(retry)
                results.data += dr.data
                results._errors += dr._errors
            return results
        elif any(retries):
            raise TypeError(
                "If any arguments to fetch are "
                "`parfive.Results` objects, all arguments must be.")

        reslist = []
        for query_result in query_results:
            for block in query_result.responses:
                reslist.append(
                    block.client.fetch(block,
                                       path=path,
                                       downloader=downloader,
                                       wait=False,
                                       **kwargs))

        results = downloader.download()
        # Combine the results objects from all the clients into one Results
        # object.
        for result in reslist:
            if result is None:
                continue
            if not isinstance(result, Results):
                raise TypeError(
                    "If wait is False a client must return a parfive.Downloader and either None"
                    " or a parfive.Results object.")
            results.data += result.data
            results._errors += result.errors

        return results