Ejemplo n.º 1
0
    def get_request(self, requests, path=None, overwrite=False, progress=True,
                    downloader=None, wait=True, max_conn=default_max_conn, **kwargs):
        """
        Query JSOC to see if the request(s) is ready for download.

        If the request is ready for download, it will then download it.

        Parameters
        ----------
        requests : `~drms.client.ExportRequest`, `str`, `list`
            `~drms.client.ExportRequest` objects or `str` request IDs or lists
            returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`.

        path : `str`
            Path to save data to, defaults to SunPy download dir.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if ``'unique'`` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------
        res: `~sunpy.net.download.Results`
            A `~sunpy.net.download.Results` instance or `None` if no URLs to download

        """
        c = drms.Client()

        kwargs['max_splits'] = kwargs.get('max_splits', 2)

        # Convert Responses to a list if not already
        if isinstance(requests, str) or not isiterable(requests):
            requests = [requests]

        # Ensure all the requests are drms ExportRequest objects
        for i, request in enumerate(requests):
            if isinstance(request, str):
                r = c.export_from_id(request)
                requests[i] = r

        # We only download if all are finished
        if not all([r.has_succeeded() for r in requests]):
            raise NotExportedError("Can not download as not all the requests "
                                   "have been exported for download yet.")

        # Ensure path has a {file} in it
        if path is None:
            default_dir = config.get("downloads", "download_dir")
            path = os.path.join(default_dir, '{file}')
        elif isinstance(path, Path):
            path = str(path)

        if isinstance(path, str) and '{file}' not in path:
            path = os.path.join(path, '{file}')

        paths = []
        for request in requests:
            if request.method == 'url-tar':
                fname = path.format(file=Path(request.tarfile).name)
                paths.append(os.path.expanduser(fname))
            else:
                for filename in request.data['filename']:
                    # Ensure we don't duplicate the file extension
                    ext = os.path.splitext(filename)[1]
                    if path.endswith(ext):
                        fname = path.strip(ext)
                    else:
                        fname = path
                    fname = fname.format(file=filename)
                    fname = os.path.expanduser(fname)
                    paths.append(fname)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite, max_conn=max_conn)

        if downloader.max_conn * kwargs['max_splits'] > 10:
            warnings.warn(("JSOC does not support more than 10 parallel connections. " +
                           f"Changing the number of parallel connections to {2 * self.default_max_conn}."),
                          SunpyUserWarning)
            kwargs['max_splits'] = 2
            downloader.max_conn = self.default_max_conn

        urls = []
        for request in requests:
            if request.status == 0:
                if request.protocol == 'as-is' or request.method == 'url-tar':
                    urls.extend(list(request.urls.url))
                else:
                    for index, data in request.data.iterrows():
                        url_dir = request.request_url + '/'
                        urls.append(urllib.parse.urljoin(url_dir, data['filename']))

        if urls:
            if progress:
                print_message = "{0} URLs found for download. Full request totalling {1}MB"
                print(print_message.format(len(urls), request._d['size']))
            for aurl, fname in zip(urls, paths):
                downloader.enqueue_file(aurl, filename=fname, **kwargs)

        if dl_set and not wait:
            return Results()

        results = downloader.download()
        return results
Ejemplo n.º 2
0
    def fetch(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """

        urls = [qrblock['url'] for qrblock in qres]

        filenames = []
        local_filenames = []

        for i, [url, qre] in enumerate(zip(urls, qres)):
            name = url.split('/')[-1]

            # temporary fix !!! coz All QRBs have same start_time values
            day = Time(qre['Time'].start.strftime('%Y-%m-%d')) + TimeDelta(
                i * u.day)

            if name not in filenames:
                filenames.append(name)

            if name.endswith('.gz'):
                local_filenames.append('{}SRS.txt'.format(
                    day.strftime('%Y%m%d')))
            else:
                local_filenames.append(name)

        # Files to be actually downloaded
        paths = self._get_full_filenames(qres, filenames, path)

        # Those files that will be present after get returns
        local_paths = self._get_full_filenames(qres, local_filenames, path)

        # remove duplicate urls. This will make paths and urls to have same number of elements.
        # OrderedDict is required to maintain ordering because it will be zipped with paths later
        urls = list(OrderedDict.fromkeys(urls))

        dobj = Downloader(max_conn=5)

        for aurl, fname in zip(urls, paths):
            dobj.enqueue_file(aurl, filename=fname)

        paths = dobj.download()

        outfiles = []
        for fname, srs_filename in zip(local_paths, local_filenames):

            name = fname.name

            past_year = False
            for i, fname2 in enumerate(paths):
                fname2 = pathlib.Path(fname2)

                if fname2.name.endswith('.txt'):
                    continue

                year = fname2.name.split('_SRS')[0]

                if year in name:
                    TarFile = tarfile.open(fname2)
                    filepath = fname.parent
                    member = TarFile.getmember('SRS/' + srs_filename)
                    member.name = name
                    TarFile.extract(member, path=filepath)
                    TarFile.close()

                    outfiles.append(fname)

                    past_year = True
                    break

            if past_year is False:
                outfiles.append(fname)

        paths.data = list(map(str, outfiles))
        return paths
Ejemplo n.º 3
0
    def fetch(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """
        urls = [qrblock['url'] for qrblock in qres]
        filenames = []
        local_filenames = []
        for url, qre in zip(urls, qres):
            name = url.split('/')[-1]
            day = qre['Start Time']
            if name not in filenames:
                filenames.append(name)
            if name.endswith('.gz'):
                local_filenames.append('{}SRS.txt'.format(
                    day.strftime('%Y%m%d')))
            else:
                local_filenames.append(name)

        if path is not None:
            path = pathlib.Path(path)
        # Files to be actually downloaded
        paths = self._get_full_filenames(qres, filenames, path)

        # Those files that will be present after get returns
        local_paths = self._get_full_filenames(qres, local_filenames, path)

        # remove duplicate urls. This will make paths and urls to have same number of elements.
        # OrderedDict is required to maintain ordering because it will be zipped with paths later
        urls = list(OrderedDict.fromkeys(urls))

        downloader = Downloader(max_conn=2)
        for aurl, fname in zip(urls, paths):
            downloader.enqueue_file(aurl, filename=fname)

        paths = downloader.download()

        outfiles = []
        for fname, srs_filename in zip(local_paths, local_filenames):
            name = fname.name
            past_year = False
            for fname2 in paths:
                fname2 = pathlib.Path(fname2)
                if fname2.name.endswith('.txt'):
                    continue
                year = fname2.name.split('_SRS')[0]
                if year in name:
                    with tarfile.open(fname2) as open_tar:
                        filepath = fname.parent
                        try:
                            member = open_tar.getmember('SRS/' + srs_filename)
                        except KeyError:
                            # Some tars have a {year}_SRS when extracted, 2010 being one example
                            member = open_tar.getmember(f'{year}_SRS/' +
                                                        srs_filename)
                        member.name = name
                        open_tar.extract(member, path=filepath)
                    outfiles.append(fname)
                    past_year = True
                    break

            if past_year is False:
                outfiles.append(fname)

        paths.data = list(map(str, outfiles))
        return paths
Ejemplo n.º 4
0
def download_sample_data(overwrite=False):
    """
    Download all sample data at once. This will overwrite any existing files.

    Parameters
    ----------
    overwrite: `bool`
        Overwrite existing sample data.
    """
    # Workaround for tox only. This is not supported as a user option
    sampledata_dir = os.environ.get("SUNPY_SAMPLEDIR", False)
    if sampledata_dir:
        sampledata_dir = Path(sampledata_dir).expanduser().resolve()
        _is_writable_dir(sampledata_dir)
    else:
        # Creating the directory for sample files to be downloaded
        sampledata_dir = Path(get_and_create_sample_dir())

    dl = Downloader(overwrite=overwrite)

    first_url = _base_urls[0]

    already_downloaded = []
    for file_name in _sample_files.keys():
        url = urljoin(first_url, file_name)
        fname = sampledata_dir / file_name
        # We have to avoid calling download if we already have all the files.
        if fname.exists() and not overwrite:
            already_downloaded.append(fname)
        else:
            dl.enqueue_file(url, filename=fname)

    if dl.queued_downloads:
        results = dl.download()
    else:
        return already_downloaded

    if not results.errors:
        return results
    else:
        log.info(
            'Failed to download one or more sample data files, retrying with a mirror.'
        )

    for retry_url in _base_urls[1:]:
        for i, err in enumerate(results.errors):
            file_name = err.filepath_partial().name
            log.debug(
                f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}"
            )
            # Overwrite the parfive error to change the url to a mirror
            new_url = urljoin(retry_url, file_name)
            results._errors[i] = _error(err.filepath_partial, new_url,
                                        err.exception)

        results = dl.retry(results)

        if not results.errors:
            return results

    for err in results.errors:
        file_name = err.filepath_partial().name
        log.debug(
            f"Failed to download {_sample_files[file_name]} from {err.url}: {err.exception}"
        )
        log.error(
            f"Failed to download {_sample_files[file_name]} from all mirrors, the file will not be available."
        )

    return results
Ejemplo n.º 5
0
    def fetch(self,
              qres,
              path=None,
              overwrite=False,
              progress=True,
              downloader=None,
              wait=True,
              **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.
        path : `str` or `pathlib.Path`, optional
            Path to the download directory, or file template including the
            ``{file}`` string which will be replaced with the filename.
        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if ``'unique'`` the filename
            will be modified to be unique.
        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.
        downloader : `parfive.Downloader`, optional
            The download manager to use.
        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if ``downloader`` is not `None`.
        **kwargs : dict, optional
            Passed to `parfive.Downloader.enqueue_file`.

        Returns
        -------
        results: `parfive.Results`

        """
        if path is not None:
            path = Path(path)

        if isinstance(qres, QueryResponseRow):
            qres = qres.as_table()

        urls = []
        if len(qres):
            urls = list(qres['url'])

        filenames = [url.split('/')[-1] for url in urls]

        paths = self._get_full_filenames(qres, filenames, path)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite)

        for url, filename in zip(urls, paths):
            downloader.enqueue_file(url, filename=filename, **kwargs)

        if dl_set and not wait:
            return

        return downloader.download()