Example #1
0
    def run(self):
        try:
            from parfive import Downloader
        except ImportError:
            log.error(
                "The parfive package needs to be installed to download files with gammapy download"
            )
            return

        if self.listfiles:
            log.info("Content will be downloaded in {}".format(self.outfolder))

        dl = Downloader(progress=self.progress)
        for rec in self.listfiles:
            url = self.listfiles[rec]["url"]
            path = self.outfolder / self.listfiles[rec]["path"]
            md5 = ""
            if "hashmd5" in self.listfiles[rec]:
                md5 = self.listfiles[rec]["hashmd5"]
            retrieve = True
            if md5 and path.exists():
                md5local = hashlib.md5(path.read_bytes()).hexdigest()
                if md5local == md5:
                    retrieve = False
            if retrieve:
                dl.enqueue_file(url, path=str(path.parent))

        try:
            dl.download()
        except Exception as ex:
            log.error("Failed to download files.")
            log.error(ex)
Example #2
0
def mas_helio():
    """
    Get some MAS heliospheric data files. These are taken from CR2210, which
    is used for PSP data comparisons in the documentation examples.
    """
    mas_helio_dir = download_dir / 'mas_helio'
    mas_helio_dir.mkdir(parents=True, exist_ok=True)
    base_url = 'http://www.predsci.com/data/runs/cr2210-medium/hmi_masp_mas_std_0201/helio/{var}002.hdf'

    # Create a downloader to queue the files to be downloaded
    dl = Downloader()

    vars = ['rho', 'vr', 'br']
    for var in vars:
        file = mas_helio_dir / f'{var}002.hdf'
        if file.exists():
            continue
        else:
            remote_file = base_url.format(var=var)
            dl.enqueue_file(remote_file, path=mas_helio_dir)

    # Download the files
    if dl.queued_downloads > 0:
        dl.download()
    return mas_helio_dir.resolve()
Example #3
0
 def download(self, url, path):
     downloader = Downloader()
     path = Path(path)
     filename = path.name
     directory = path.parent
     downloader.enqueue_file(url, directory, filename)
     try:
         downloader.download()
     except Exception as e:
         raise DownloaderError from e
Example #4
0
    def run(self):
        try:
            from parfive import Downloader
        except ImportError:
            log.error("To use gammapy download, install the parfive package!")
            return

        if self.listfiles:
            log.info(f"Content will be downloaded in {self.outfolder}")

        dl = Downloader(progress=self.progress, file_progress=False)
        for rec in self.listfiles:
            url = self.listfiles[rec]["url"]
            path = self.outfolder / self.listfiles[rec]["path"]
            md5 = ""
            if "hashmd5" in self.listfiles[rec]:
                md5 = self.listfiles[rec]["hashmd5"]
            retrieve = True
            if md5 and path.exists():
                md5local = hashlib.md5(path.read_bytes()).hexdigest()
                if md5local == md5:
                    retrieve = False
            if retrieve:
                dl.enqueue_file(url, path=str(path.parent))

        log.info(f"{dl.queued_downloads} files to download.")
        res = dl.download()
        log.info(f"{len(res)} files downloaded.")
        for err in res.errors:
            _, _, exception = err
            log.error(f"Error: {exception}")
Example #5
0
    def fetch(self, qres, path=None, overwrite=False,
              progress=True, downloader=None, wait=True):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        path : `str` or `pathlib.Path`, optional
            Path to the download directory, or file template including the
            ``{file}`` string which will be replaced with the filename.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------

        results: `parfive.Results`

        """
        if path is not None:
            path = Path(path)

        urls = [qrblock.url for qrblock in qres.blocks]

        filenames = [url.split('/')[-1] for url in urls]

        paths = self._get_full_filenames(qres, filenames, path)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite)

        for url, filename in zip(urls, paths):
            downloader.enqueue_file(url, filename=filename)

        if dl_set and not wait:
            return

        return downloader.download()
Example #6
0
    def fetch(self, qres, path=None, overwrite=False,
              progress=True, downloader=None, wait=True):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        path : `str` or `pathlib.Path`, optional
            Path to the download directory, or file template including the
            ``{file}`` string which will be replaced with the filename.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------

        results: `parfive.Results`

        """
        if path is not None:
            path = Path(path)

        urls = [qrblock.url for qrblock in qres]

        filenames = [url.split('/')[-1] for url in urls]

        paths = self._get_full_filenames(qres, filenames, path)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite)

        for url, filename in zip(urls, paths):
            downloader.enqueue_file(url, filename=filename)

        if dl_set and not wait:
            return

        return downloader.download()
def download_files_parfive(list_of_files, pathy="/Users/laurahayes/ionospheric_work/ionospheric-analysis/vlf_codes/vlf_bas_files/"):
    dl = Downloader()
    for f in list_of_files:
        filename = f.split('/')[-1]
        dl.enqueue_file(f, path=pathy)


    files = dl.download()
Example #8
0
    def getenvironment(self):
        try:
            from parfive import Downloader
        except ImportError:
            log.error("To use gammapy download, install the parfive package!")
            return

        dl = Downloader(progress=False, file_progress=False)
        filename_env = "gammapy-" + self.release + "-environment.yml"
        url_file_env = BASE_URL + "/install/" + filename_env
        filepath_env = str(self.outfolder / filename_env)
        dl.enqueue_file(url_file_env, path=filepath_env)
        try:
            log.info(f"Downloading {url_file_env}")
            Path(filepath_env).parent.mkdir(parents=True, exist_ok=True)
            dl.download()
        except Exception as ex:
            log.error(ex)
            exit()
Example #9
0
def test_multipart_with_error(multipartserver, tmp_path):
    multipartserver.callback = partial(error_on_nth_request, 3)
    dl = Downloader(progress=False)
    max_splits = 5
    dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits)
    files = dl.download()

    assert len(files) == 0
    assert len(files.errors) == 1
    assert isinstance(files.errors[0].exception, MultiPartDownloadError)

    expected_file = tmp_path / "testfile.txt"
    assert not expected_file.exists()
Example #10
0
def main():
    args = parser.parse_args()
    dataset = args.dataset

    # Load annotations
    annFile = '../annotations/instances_{}.json'.format(dataset)
    assert os.path.isfile(annFile)
    cocoPath = './{}'.format(dataset)
    try:
        os.mkdir(cocoPath)
    except FileExistsError:
        pass

    # Init coco
    coco = COCO(annFile)
    personCatID = coco.getCatIds(catNms=['person'])[0]
    cocoImageIds = coco.getImgIds(catIds=personCatID)

    print("Putting all urls into big list!")
    urls = []
    for i in cocoImageIds:
        cocoImg = coco.loadImgs(i)[0]
        annIds = coco.getAnnIds(imgIds=cocoImg["id"],
                                catIds=personCatID,
                                iscrowd=None)
        annotation = coco.loadAnns(annIds)[0]
        if annotation["iscrowd"] == 0:
            urls.append(cocoImg["coco_url"])

    print("Enqueueing download of {} items".format(len(urls)))
    dl = Downloader()

    for url in urls:
        dl.enqueue_file(url, path=cocoPath)

    print("Downloading files...")
    dl.download()
Example #11
0
def test_multipart(multipartserver, tmp_path):
    dl = Downloader(progress=False)
    max_splits = 5
    dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits)
    files = dl.download()

    # Verify we transferred all the content
    with open(files[0], "rb") as fobj:
        assert fobj.read() == b"a" * 100

    # Assert that we made the expected number of requests
    assert len(multipartserver.requests) == max_splits + 1
    assert "HTTP_RANGE" not in multipartserver.requests[0]
    for split_req in multipartserver.requests[1:]:
        assert "HTTP_RANGE" in split_req
Example #12
0
def main():
    args = parse_args(sys.argv[1:])
    downloader = Downloader(max_conn=args.max_conn,
                            file_progress=not args.no_file_progress,
                            overwrite=args.overwrite)
    for url in args.urls:
        downloader.enqueue_file(url, path=args.directory)
    results = downloader.download()
    for i in results:
        print(i)

    err_str = ''
    for err in results.errors:
        err_str += f'{err.url} \t {err.exception}\n'
    if err_str:
        sys.exit(err_str)
Example #13
0
def download_beacon(dtime):
    datestr = dtime.strftime('%Y%m%d')
    for time in [
            '000530',
            '001615',
            '005530',
            '010530',
            '011530',
            '043530',
    ]:
        url = ('https://stereo-ssc.nascom.nasa.gov/pub/beacon/ahead/secchi/img'
               f'/euvi/{datestr}/{datestr}_{time}_n7euA.fts')
        print(url)
        dl = Downloader()
        dl.enqueue_file(url, path=map_path(dtime).parent)
        files = dl.download()
        if len(files.errors):
            continue
        pathlib.Path(files[0]).replace(map_path(dtime))
        return
    raise RuntimeError(f'No EUVI beacon map available for {dtime}')
Example #14
0
File: vso.py Project: Cadair/sunpy
    def fetch(self, query_response, path=None, methods=None, site=None,
              progress=True, overwrite=False, downloader=None, wait=True):
        """
        Download data specified in the query_response.

        Parameters
        ----------
        query_response : sunpy.net.vso.QueryResponse
            QueryResponse containing the items to be downloaded.

        path : str
            Specify where the data is to be downloaded. Can refer to arbitrary
            fields of the QueryResponseItem (instrument, source, time, ...) via
            string formatting, moreover the file-name of the file downloaded can
            be referred to as file, e.g.
            "{source}/{instrument}/{time.start}/{file}".

        methods : {list of str}
            Download methods, defaults to URL-FILE_Rice then URL-FILE.
            Methods are a concatenation of one PREFIX followed by any number of
            SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`.
            The full list of
            `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_
            and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_
            are listed on the VSO site.

        site : str
            There are a number of caching mirrors for SDO and other
            instruments, some available ones are listed below.

            =============== ========================================================
            NSO             National Solar Observatory, Tucson (US)
            SAO  (aka CFA)  Smithonian Astronomical Observatory, Harvard U. (US)
            SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US)
            ROB             Royal Observatory of Belgium (Belgium)
            MPS             Max Planck Institute for Solar System Research (Germany)
            UCLan           University of Central Lancashire (UK)
            IAS             Institut Aeronautique et Spatial (France)
            KIS             Kiepenheuer-Institut fur Sonnenphysik Germany)
            NMSU            New Mexico State University (US)
            =============== ========================================================

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------
        out : `parfive.Results`
            Object that supplies a list of filenames and any errors.

        Examples
        --------
        >>> files = fetch(qr) # doctest:+SKIP
        """
        if path is None:
            path = os.path.join(config.get('downloads', 'download_dir'),
                                '{file}')
        elif isinstance(path, str) and '{file}' not in path:
            path = os.path.join(path, '{file}')
        path = os.path.expanduser(path)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress)

        fileids = VSOClient.by_fileid(query_response)
        if not fileids:
            return downloader.download()
        # Adding the site parameter to the info
        info = {}
        if site is not None:
            info['site'] = site

        VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse")

        data_request = self.make_getdatarequest(query_response, methods, info)
        data_response = VSOGetDataResponse(self.api.service.GetData(data_request))

        err_results = self.download_all(data_response, methods, downloader, path, fileids)

        if dl_set and not wait:
            return err_results

        results = downloader.download()
        results += err_results
        results._errors += err_results.errors
        return results
Example #15
0
    def fetch(self, *query_results, path=None, max_conn=5, progress=True,
              overwrite=False, downloader=None, **kwargs):
        """
        Download the records represented by
        `~sunpy.net.fido_factory.UnifiedResponse` objects.

        Parameters
        ----------
        query_results : `sunpy.net.fido_factory.UnifiedResponse`
            Container returned by query method, or multiple.

        path : `str`
            The directory to retrieve the files into. Can refer to any fields
            in `UnifiedResponse.response_block_properties` via string formatting,
            moreover the file-name of the file downloaded can be referred to as file,
            e.g. "{source}/{instrument}/{time.start}/{file}".

        max_conn : `int`, optional
            The number of parallel download slots.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use. If specified the ``max_conn``,
            ``progress`` and ``overwrite`` arguments are ignored.

        Returns
        -------
        `parfive.Results`

        Examples
        --------
        >>> from sunpy.net.attrs import Time, Instrument
        >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT'))  # doctest: +REMOTE_DATA
        >>> filepaths = Fido.fetch(unifresp)  # doctest: +SKIP

        If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``.

        >>> filepaths = Fido.fetch(filepaths)  # doctest: +SKIP

        """
        if path is not None:
            exists = list(filter(lambda p: p.exists(), Path(path).parents))
            if not os.access(exists[0], os.W_OK):
                raise PermissionError('You do not have permission to write'
                                      f' to the directory {exists[0]}.')

        if "wait" in kwargs:
            raise ValueError("wait is not a valid keyword argument to Fido.fetch.")

        if downloader is None:
            downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite)
        elif not isinstance(downloader, Downloader):
            raise TypeError("The downloader argument must be a parfive.Downloader object.")

        # Handle retrying failed downloads
        retries = [isinstance(arg, Results) for arg in query_results]
        if all(retries):
            results = Results()
            for retry in query_results:
                dr = downloader.retry(retry)
                results.data += dr.data
                results._errors += dr._errors
            return results
        elif any(retries):
            raise TypeError("If any arguments to fetch are "
                            "`parfive.Results` objects, all arguments must be.")

        reslist = []
        for query_result in query_results:
            for block in query_result.responses:
                reslist.append(block.client.fetch(block, path=path,
                                                  downloader=downloader,
                                                  wait=False, **kwargs))

        results = downloader.download()
        # Combine the results objects from all the clients into one Results
        # object.
        for result in reslist:
            if result is None:
                continue
            if not isinstance(result, Results):
                raise TypeError(
                    "If wait is False a client must return a parfive.Downloader and either None"
                    " or a parfive.Results object.")
            results.data += result.data
            results._errors += result.errors

        return results
Example #16
0
    def fetch(self, *query_results, path=None, max_conn=5, progress=True,
              overwrite=False, downloader=None, **kwargs):
        """
        Download the records represented by
        `~sunpy.net.fido_factory.UnifiedResponse` objects.

        Parameters
        ----------
        query_results : `sunpy.net.fido_factory.UnifiedResponse`
            Container returned by query method, or multiple.

        path : `str`
            The directory to retrieve the files into. Can refer to any fields
            in `UnifiedResponse.response_block_properties` via string formatting,
            moreover the file-name of the file downloaded can be referred to as file,
            e.g. "{source}/{instrument}/{time.start}/{file}".

        max_conn : `int`, optional
            The number of parallel download slots.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use. If specified the ``max_conn``,
            ``progress`` and ``overwrite`` arguments are ignored.

        Returns
        -------
        `parfive.Results`

        Examples
        --------
        >>> from sunpy.net.vso.attrs import Time, Instrument
        >>> unifresp = Fido.search(Time('2012/3/4','2012/3/5'), Instrument('EIT'))  # doctest: +REMOTE_DATA
        >>> filepaths = Fido.fetch(unifresp)  # doctest: +SKIP

        If any downloads fail, they can be retried by passing the `parfive.Results` object back into ``fetch``.

        >>> filepaths = Fido.fetch(filepaths)  # doctest: +SKIP

        """

        if "wait" in kwargs:
            raise ValueError("wait is not a valid keyword argument to Fido.fetch.")

        if downloader is None:
            downloader = Downloader(max_conn=max_conn, progress=progress, overwrite=overwrite)
        elif not isinstance(downloader, Downloader):
            raise TypeError("The downloader argument must be a parfive.Downloader object.")

        # Handle retrying failed downloads
        retries = [isinstance(arg, Results) for arg in query_results]
        if all(retries):
            results = Results()
            for retry in query_results:
                dr = downloader.retry(retry)
                results.data += dr.data
                results._errors += dr._errors
            return results
        elif any(retries):
            raise TypeError("If any arguments to fetch are "
                            "`parfive.Results` objects, all arguments must be.")

        reslist = []
        for query_result in query_results:
            for block in query_result.responses:
                reslist.append(block.client.fetch(block, path=path,
                                                  downloader=downloader,
                                                  wait=False, **kwargs))

        results = downloader.download()
        # Combine the results objects from all the clients into one Results
        # object.
        for result in reslist:
            if result is None:
                continue
            if not isinstance(result, Results):
                raise TypeError(
                    "If wait is False a client must return a parfive.Downloader and either None"
                    " or a parfive.Results object.")
            results.data += result.data
            results._errors += result.errors

        return results
Example #17
0
File: jsoc.py Project: Cadair/sunpy
    def get_request(self, requests, path=None, overwrite=False, progress=True,
                    downloader=None, wait=True):
        """
        Query JSOC to see if the request(s) is ready for download.

        If the request is ready for download, it will then download it.

        Parameters
        ----------
        requests : `~drms.ExportRequest`, `str`, `list`
            `~drms.ExportRequest` objects or `str` request IDs or lists
            returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`.

        path : `str`
            Path to save data to, defaults to SunPy download dir.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------
        res: `~sunpy.net.download.Results`
            A `~sunpy.net.download.Results` instance or `None` if no URLs to download

        """
        c = drms.Client()

        # Convert Responses to a list if not already
        if isinstance(requests, str) or not isiterable(requests):
            requests = [requests]

        # Ensure all the requests are drms ExportRequest objects
        for i, request in enumerate(requests):
            if isinstance(request, str):
                r = c.export_from_id(request)
                requests[i] = r

        # We only download if all are finished
        if not all([r.has_succeeded() for r in requests]):
            raise NotExportedError("Can not download as not all the requests "
                                   "have been exported for download yet.")

        # Ensure path has a {file} in it
        if path is None:
            default_dir = config.get("downloads", "download_dir")
            path = os.path.join(default_dir, '{file}')
        elif isinstance(path, str) and '{file}' not in path:
            path = os.path.join(path, '{file}')

        paths = []
        for request in requests:
            for filename in request.data['filename']:
                # Ensure we don't duplicate the file extension
                ext = os.path.splitext(filename)[1]
                if path.endswith(ext):
                    fname = path.strip(ext)
                else:
                    fname = path
                fname = fname.format(file=filename)
                fname = os.path.expanduser(fname)
                paths.append(fname)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite)

        urls = []
        for request in requests:
            if request.status == 0:
                for index, data in request.data.iterrows():
                    url_dir = request.request_url + '/'
                    urls.append(urllib.parse.urljoin(url_dir, data['filename']))
        if urls:
            if progress:
                print_message = "{0} URLs found for download. Full request totalling {1}MB"
                print(print_message.format(len(urls), request._d['size']))
            for aurl, fname in zip(urls, paths):
                downloader.enqueue_file(aurl, filename=fname)

        if dl_set and not wait:
            return Results()

        results = downloader.download()
        return results
Example #18
0
def euvi_pch_data_download(rootpath='',
                           start_date='2007/05/01',
                           end_date='2019/01/01'):
    # Crawl through and scrape the EUVI wavelet images

    url_head = 'http://sd-www.jhuapl.edu/secchi/wavelets/fits/'
    start_date = parse_time(start_date).to_datetime()
    end_date = parse_time(end_date).to_datetime()

    resp = request.urlopen(url_head)
    soup = BeautifulSoup(resp,
                         from_encoding=resp.info().get_param('charset'),
                         features="lxml")
    subs = [
        link.text for link in soup.find_all('a', href=True)
        if link.text.endswith('/')
    ]

    substime = [datetime.datetime.strptime(s, '%Y%m/') for s in subs]
    gooddate = [
        s >= (start_date - datetime.timedelta(days=start_date.day - 1))
        and (s <= end_date) for s in substime
    ]

    url_subdir1 = [
        parse.urljoin(url_head, sub_dir)
        for sub_dir, gd in zip(subs, gooddate) if gd
    ]

    dl = Downloader()

    if not rootpath:
        save_dir = os.path.abspath(os.path.curdir)
    else:
        save_dir = os.path.abspath(rootpath)

    #  crawling until a full list has been generated
    for subdir1 in url_subdir1:
        resp = request.urlopen(subdir1)
        soup = BeautifulSoup(resp,
                             from_encoding=resp.info().get_param('charset'),
                             features="lxml")
        subs = [
            link.text for link in soup.find_all('a', href=True)
            if link.text.endswith('/')
        ]

        url_subdir2 = [parse.urljoin(subdir1, sub_dir) for sub_dir in subs]

        for subdir2 in url_subdir2:
            resp = request.urlopen(subdir2)
            soup = BeautifulSoup(
                resp,
                from_encoding=resp.info().get_param('charset'),
                features="lxml")
            subs = [
                link.text for link in soup.find_all('a', href=True)
                if link.text.endswith('/')
            ]

            url_subdir3 = [parse.urljoin(subdir2, sub_dir) for sub_dir in subs]

            for subdir3 in url_subdir3:
                subs = []
                resp = request.urlopen(subdir3)
                soup = BeautifulSoup(
                    resp,
                    from_encoding=resp.info().get_param('charset'),
                    features="lxml")
                subs = [
                    link.text for link in soup.find_all('a', href=True)
                    if link.text.endswith('.fts.gz')
                ]

                if len(subs) > 1:
                    image_url = [
                        parse.urljoin(subdir3, sub_dir) for sub_dir in subs
                    ]
                    # save_path = [os.path.join(save_dir, subdir3.split('fits/')[1], path) for path in subs]
                    save_path = [
                        os.path.join(save_dir,
                                     subdir3.split('fits/')[1])
                        for path in subs
                    ]

                    image_times = [
                        datetime_from_euvi_filename(path) for path in subs
                    ]

                    # grab every 4 hours
                    dt = list(
                        np.logical_not([
                            np.mod((time - image_times[0]).seconds, 14400)
                            for time in image_times
                        ]))

                    if np.sum(dt) < 6:
                        dt2 = list(
                            np.logical_not([
                                np.mod((time - image_times[1]).seconds, 14400)
                                for time in image_times
                            ]))
                        if len(dt2) > len(dt):
                            dt = dt2

                    st = [tt >= start_date for tt in image_times]
                    et = [tt <= end_date for tt in image_times]
                    goodness = [(aa and bb and cc)
                                for aa, bb, cc in zip(dt, st, et)]

                    if np.sum(goodness):
                        os.makedirs(os.path.join(save_dir,
                                                 subdir3.split('fits/')[1]),
                                    exist_ok=True)

                    # download each image
                    for good_image, image_loc, image_destination in zip(
                            goodness, image_url, save_path):
                        if good_image:
                            dl.enqueue_file(image_loc, path=image_destination)
                            files = dl.download()

                    print('Downloaded EUVI ' +
                          wavelength_from_euvi_filename(files[0]) +
                          'images for ' + image_times[0].strftime('%Y-%m-%d'))
                else:
                    print('Too few images detected in: ', subdir3)
Example #19
0
    def fetch(self,
              query_response,
              path=None,
              methods=None,
              site=None,
              progress=True,
              overwrite=False,
              downloader=None,
              wait=True):
        """
        Download data specified in the query_response.

        Parameters
        ----------
        query_response : sunpy.net.vso.QueryResponse
            QueryResponse containing the items to be downloaded.

        path : str
            Specify where the data is to be downloaded. Can refer to arbitrary
            fields of the QueryResponseItem (instrument, source, time, ...) via
            string formatting, moreover the file-name of the file downloaded can
            be referred to as file, e.g.
            "{source}/{instrument}/{time.start}/{file}".

        methods : {list of str}
            Download methods, defaults to URL-FILE_Rice then URL-FILE.
            Methods are a concatenation of one PREFIX followed by any number of
            SUFFIXES i.e. `PREFIX-SUFFIX_SUFFIX2_SUFFIX3`.
            The full list of
            `PREFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_PREFIX>`_
            and `SUFFIXES <https://sdac.virtualsolar.org/cgi/show_details?keyword=METHOD_SUFFIX>`_
            are listed on the VSO site.

        site : str
            There are a number of caching mirrors for SDO and other
            instruments, some available ones are listed below.

            =============== ========================================================
            NSO             National Solar Observatory, Tucson (US)
            SAO  (aka CFA)  Smithonian Astronomical Observatory, Harvard U. (US)
            SDAC (aka GSFC) Solar Data Analysis Center, NASA/GSFC (US)
            ROB             Royal Observatory of Belgium (Belgium)
            MPS             Max Planck Institute for Solar System Research (Germany)
            UCLan           University of Central Lancashire (UK)
            IAS             Institut Aeronautique et Spatial (France)
            KIS             Kiepenheuer-Institut fur Sonnenphysik Germany)
            NMSU            New Mexico State University (US)
            =============== ========================================================

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bars will be shown at all.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------
        out : `parfive.Results`
            Object that supplies a list of filenames and any errors.

        Examples
        --------
        >>> files = fetch(qr) # doctest:+SKIP
        """
        if path is None:
            path = os.path.join(config.get('downloads', 'download_dir'),
                                '{file}')
        elif isinstance(path, str) and '{file}' not in path:
            path = os.path.join(path, '{file}')
        path = os.path.expanduser(path)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress)

        fileids = VSOClient.by_fileid(query_response)
        if not fileids:
            return downloader.download() if wait else Results()
        # Adding the site parameter to the info
        info = {}
        if site is not None:
            info['site'] = site

        VSOGetDataResponse = self.api.get_type("VSO:VSOGetDataResponse")

        data_request = self.make_getdatarequest(query_response, methods, info)
        data_response = VSOGetDataResponse(
            self.api.service.GetData(data_request))

        err_results = self.download_all(data_response, methods, downloader,
                                        path, fileids)

        if dl_set and not wait:
            return err_results

        results = downloader.download()
        results += err_results
        results._errors += err_results.errors
        return results
Example #20
0
    def fetch(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """

        urls = [qrblock.url for qrblock in qres]

        filenames = []
        local_filenames = []

        for i, [url, qre] in enumerate(zip(urls, qres)):
            name = url.split('/')[-1]

            # temporary fix !!! coz All QRBs have same start_time values
            day = Time(qre.time.start.strftime('%Y-%m-%d')) + TimeDelta(
                i * u.day)

            if name not in filenames:
                filenames.append(name)

            if name.endswith('.gz'):
                local_filenames.append('{}SRS.txt'.format(
                    day.strftime('%Y%m%d')))
            else:
                local_filenames.append(name)

        # Files to be actually downloaded
        paths = self._get_full_filenames(qres, filenames, path)

        # Those files that will be present after get returns
        local_paths = self._get_full_filenames(qres, local_filenames, path)

        # remove duplicate urls. This will make paths and urls to have same number of elements.
        # OrderedDict is required to maintain ordering because it will be zipped with paths later
        urls = list(OrderedDict.fromkeys(urls))

        dobj = Downloader(max_conn=5)

        for aurl, fname in zip(urls, paths):
            dobj.enqueue_file(aurl, filename=fname)

        paths = dobj.download()

        outfiles = []
        for fname, srs_filename in zip(local_paths, local_filenames):

            name = fname.name

            past_year = False
            for i, fname2 in enumerate(paths):
                fname2 = pathlib.Path(fname2)

                if fname2.name.endswith('.txt'):
                    continue

                year = fname2.name.split('_SRS')[0]

                if year in name:
                    TarFile = tarfile.open(fname2)
                    filepath = fname.parent
                    member = TarFile.getmember('SRS/' + srs_filename)
                    member.name = name
                    TarFile.extract(member, path=filepath)
                    TarFile.close()

                    outfiles.append(fname)

                    past_year = True
                    break

            if past_year is False:
                outfiles.append(fname)

        paths.data = list(map(str, outfiles))
        return paths
Example #21
0
    def get_request(self,
                    requests,
                    path=None,
                    overwrite=False,
                    progress=True,
                    downloader=None,
                    wait=True):
        """
        Query JSOC to see if the request(s) is ready for download.

        If the request is ready for download, it will then download it.

        Parameters
        ----------
        requests : `~drms.ExportRequest`, `str`, `list`
            `~drms.ExportRequest` objects or `str` request IDs or lists
            returned by `~sunpy.net.jsoc.jsoc.JSOCClient.request_data`.

        path : `str`
            Path to save data to, defaults to SunPy download dir.

        progress : `bool`, optional
            If `True` show a progress bar showing how many of the total files
            have been downloaded. If `False`, no progress bar will be shown.

        overwrite : `bool` or `str`, optional
            Determine how to handle downloading if a file already exists with the
            same name. If `False` the file download will be skipped and the path
            returned to the existing file, if `True` the file will be downloaded
            and the existing file will be overwritten, if `'unique'` the filename
            will be modified to be unique.

        downloader : `parfive.Downloader`, optional
            The download manager to use.

        wait : `bool`, optional
           If `False` ``downloader.download()`` will not be called. Only has
           any effect if `downloader` is not `None`.

        Returns
        -------
        res: `~sunpy.net.download.Results`
            A `~sunpy.net.download.Results` instance or `None` if no URLs to download

        """
        c = drms.Client()

        # Convert Responses to a list if not already
        if isinstance(requests, str) or not isiterable(requests):
            requests = [requests]

        # Ensure all the requests are drms ExportRequest objects
        for i, request in enumerate(requests):
            if isinstance(request, str):
                r = c.export_from_id(request)
                requests[i] = r

        # We only download if all are finished
        if not all([r.has_succeeded() for r in requests]):
            raise NotExportedError("Can not download as not all the requests "
                                   "have been exported for download yet.")

        # Ensure path has a {file} in it
        if path is None:
            default_dir = config.get("downloads", "download_dir")
            path = os.path.join(default_dir, '{file}')
        elif isinstance(path, str) and '{file}' not in path:
            path = os.path.join(path, '{file}')

        paths = []
        for request in requests:
            for filename in request.data['filename']:
                # Ensure we don't duplicate the file extension
                ext = os.path.splitext(filename)[1]
                if path.endswith(ext):
                    fname = path.strip(ext)
                else:
                    fname = path
                fname = fname.format(file=filename)
                fname = os.path.expanduser(fname)
                paths.append(fname)

        dl_set = True
        if not downloader:
            dl_set = False
            downloader = Downloader(progress=progress, overwrite=overwrite)

        urls = []
        for request in requests:
            if request.status == 0:
                for index, data in request.data.iterrows():
                    url_dir = request.request_url + '/'
                    urls.append(urllib.parse.urljoin(url_dir,
                                                     data['filename']))
        if urls:
            if progress:
                print_message = "{0} URLs found for download. Full request totalling {1}MB"
                print(print_message.format(len(urls), request._d['size']))
            for aurl, fname in zip(urls, paths):
                downloader.enqueue_file(aurl, filename=fname)

        if dl_set and not wait:
            return Results()

        results = downloader.download()
        return results
Example #22
0
File: noaa.py Project: Cadair/sunpy
    def fetch(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """

        urls = [qrblock.url for qrblock in qres]

        filenames = []
        local_filenames = []

        for i, [url, qre] in enumerate(zip(urls, qres)):
            name = url.split('/')[-1]

            # temporary fix !!! coz All QRBs have same start_time values
            day = Time(qre.time.start.strftime('%Y-%m-%d')) + TimeDelta(i*u.day)

            if name not in filenames:
                filenames.append(name)

            if name.endswith('.gz'):
                local_filenames.append('{}SRS.txt'.format(day.strftime('%Y%m%d')))
            else:
                local_filenames.append(name)

        # Files to be actually downloaded
        paths = self._get_full_filenames(qres, filenames, path)

        # Those files that will be present after get returns
        local_paths = self._get_full_filenames(qres, local_filenames, path)

        # remove duplicate urls. This will make paths and urls to have same number of elements.
        # OrderedDict is required to maintain ordering because it will be zipped with paths later
        urls = list(OrderedDict.fromkeys(urls))

        dobj = Downloader(max_conn=5)

        for aurl, fname in zip(urls, paths):
            dobj.enqueue_file(aurl, filename=fname)

        paths = dobj.download()

        outfiles = []
        for fname, srs_filename in zip(local_paths, local_filenames):

            name = fname.name

            past_year = False
            for i, fname2 in enumerate(paths):
                fname2 = pathlib.Path(fname2)

                if fname2.name.endswith('.txt'):
                    continue

                year = fname2.name.split('_SRS')[0]

                if year in name:
                    TarFile = tarfile.open(fname2)
                    filepath = fname.parent
                    member = TarFile.getmember('SRS/' + srs_filename)
                    member.name = name
                    TarFile.extract(member, path=filepath)
                    TarFile.close()

                    outfiles.append(fname)

                    past_year = True
                    break

            if past_year is False:
                outfiles.append(fname)

        paths.data = list(map(str, outfiles))
        return paths