Python is_url_in_cache Examples, astropy.utils.data.is_url_in_cache Python Examples

Example #1

0

Show file

File: test_data.py Project: kgc85/astropy

def test_is_url_in_cache():
    from astropy.utils.data import download_file, is_url_in_cache

    assert not is_url_in_cache('http://astropy.org/nonexistentfile')

    download_file(TESTURL, cache=True, show_progress=False)
    assert is_url_in_cache(TESTURL)

Example #2

0

Show file

File: test_data.py Project: Cadair/astropy

def test_is_url_in_cache():
    from astropy.utils.data import download_file, is_url_in_cache

    assert not is_url_in_cache('http://astropy.org/nonexistentfile')

    download_file(TESTURL, cache=True, show_progress=False)
    assert is_url_in_cache(TESTURL)

Example #3

0

Show file

def get_iers_up_to_date(mjd=Time.now().mjd - 45.0):
    """
    Update the IERS B table to include MJD (defaults to 45 days ago) and open IERS_Auto

    """

    # First clear the IERS_Auto table
    IERS_Auto.iers_table = None

    if mjd > Time.now().mjd:
        raise ValueError("IERS B data requested for future MJD {}".format(mjd))
    might_be_old = is_url_in_cache(IERS_B_URL)
    iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True))
    if might_be_old and iers_b[-1]["MJD"].to_value(u.d) < mjd:
        # Try wiping the download and re-downloading
        log.info("IERS B Table appears to be old. Attempting to re-download.")
        clear_download_cache(IERS_B_URL)
        iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True))
    if iers_b[-1]["MJD"].to_value(u.d) < mjd:
        log.warning("IERS B data not yet available for MJD {}".format(mjd))

    # Now open IERS_Auto with no argument, so it should use the IERS_B that we just made sure was up to date
    iers_auto = IERS_Auto.open()

    if astropy.version.major >= 4:
        # Tell astropy to use this table for all future transformations
        earth_orientation_table.set(iers_auto)

Example #4

0

Show file

File: erfautils.py Project: tcromartie/PINT

def get_iers_b_up_to_date(mjd):
    """Update the IERS B table to include MJD if necessary"""
    if Time.now().mjd <= mjd:
        raise ValueError("IERS B data requested for future MJD {}".format(mjd))
    might_be_old = is_url_in_cache(IERS_B_URL)
    iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True))
    if might_be_old and iers_b[-1]["MJD"].to_value(u.d) < mjd:
        # Try wiping the download and re-downloading
        clear_download_cache(IERS_B_URL)
        iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True))
    if iers_b[-1]["MJD"].to_value(u.d) < mjd:
        raise ValueError(
            "IERS B data not yet available for MJD {}".format(mjd))
    return iers_b

Example #5

0

Show file

    def save_fits(self, savepath, link_cache='hard'):
        """
        Save a FITS file to savepath

        Parameters
        ----------
        savepath : str
            The full path to a FITS filename, e.g. "file.fits", or
            "/path/to/file.fits".
        link_cache : 'hard', 'sym', or False
            Try to create a hard or symbolic link to the astropy cached file?
            If the system is unable to create a hardlink, the file will be
            copied to the target location.
        """
        self.get_fits()
        target_key = str(self._target)

        # There has been some internal refactoring in astropy.utils.data
        # so we do this check. Update when minimum required astropy changes.
        if ASTROPY_LT_4_0:
            if not aud.is_url_in_cache(target_key):
                raise IOError("Cached file not found / does not exist.")
            target = aud.download_file(target_key, cache=True)
        else:
            target = aud.download_file(target_key, cache=True, sources=[])

        if link_cache == 'hard':
            try:
                os.link(target, savepath)
            except (IOError, OSError, AttributeError):
                shutil.copy(target, savepath)
        elif link_cache == 'sym':
            try:
                os.symlink(target, savepath)
            except AttributeError:
                raise OSError('Creating symlinks is not possible on this OS.')
        else:
            shutil.copy(target, savepath)

Example #6

0

Show file

File: iers.py Project: ludwigschwardt/astropy

    def auto_open(cls, files=None):
        """Attempt to get an up-to-date leap-second list.

        The routine will try the files in sequence until it finds one
        whose expiration date is "good enough" (see below).  If none
        are good enough, it returns the one with the most recent expiration
        date, warning if that file is expired.

        For remote files that are cached already, the cached file is tried
        first before attempting to retrieve it again.

        Parameters
        ----------
        files : list of path-like, optional
            List of files/URLs to attempt to open.  By default, uses
            ``cls._auto_open_files``.

        Returns
        -------
        leap_seconds : `~astropy.utils.iers.LeapSeconds`
            Up to date leap-second table

        Notes
        -----
        Bulletin C is released about 10 days after a possible leap second is
        introduced, i.e., mid-January or mid-July.  Expiration days are thus
        generally at least 150 days after the present.  We look for a file
        that expires more than 180 - `~astropy.utils.iers.Conf.auto_max_age`
        after the present.
        """
        offset = 180 - (30 if conf.auto_max_age is None else conf.auto_max_age)
        good_enough = cls._today() + TimeDelta(offset, format='jd')

        if files is None:
            # Basic files to go over (entries in _auto_open_files can be
            # configuration items, which we want to be sure are up to date).
            files = [getattr(conf, f, f) for f in cls._auto_open_files]

        # Remove empty entries.
        files = [f for f in files if f]

        # Our trials start with normal files and remote ones that are
        # already in cache.  The bools here indicate that the cache
        # should be used.
        trials = [(f, True) for f in files
                  if not urlparse(f).netloc or is_url_in_cache(f)]
        # If we are allowed to download, we try downloading new versions
        # if none of the above worked.
        if conf.auto_download:
            trials += [(f, False) for f in files if urlparse(f).netloc]

        self = None
        err_list = []
        # Go through all entries, and return the first one that
        # is not expired, or the most up to date one.
        for f, allow_cache in trials:
            if not allow_cache:
                clear_download_cache(f)

            try:
                trial = cls.open(f, cache=True)
            except Exception as exc:
                err_list.append(exc)
                continue

            if self is None or trial.expires > self.expires:
                self = trial
                self.meta['data_url'] = str(f)
                if self.expires > good_enough:
                    break

        if self is None:
            raise ValueError('none of the files could be read. The '
                             'following errors were raised:\n' + str(err_list))

        if self.expires < self._today() and conf.auto_max_age is not None:
            warn('leap-second file is expired.', IERSStaleWarning)

        return self

Example #7

0

Show file

File: test_vocabularies_remote.py Project: onekiloparsec/pyvo

 def test_basic_getting(self):
     voc = vocabularies.get_vocabulary("datalink/core")
     assert "progenitor" in voc["terms"]
     assert data.is_url_in_cache("http://www.ivoa.net/rdf/datalink/core")

Example #8

0

Show file

def get_references(useads=False, cache=True):
    """
    Return a dictionary of paper
    `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_
    in the ATNF catalogue. The keys are the ref strings given in the ATNF
    catalogue.

    Args:
        useads (bool): boolean to set whether to use the python mod:`ads`
            module to get the NASA ADS URL for the references.
        cache (bool): use cached, or cache, the reference bundled with the
            catalogue tarball.

    Returns:
        dict: a dictionary of references.
    """

    import tempfile
    import json

    # get the tarball
    try:
        dbtarfile = download_file(ATNF_TARBALL, cache=cache)
    except IOError:
        raise IOError('Problem accessing ATNF catalogue tarball')

    try:
        # open tarball
        pulsargz = tarfile.open(dbtarfile, mode='r:gz')

        # extract the references
        reffile = pulsargz.extractfile('psrcat_tar/psrcat_ref')
    except IOError:
        raise IOError('Problem extracting the database file')

    refdic = {}
    refidx = 0
    thisref = ''

    for line in reffile.readlines():
        if isinstance(line, string_types):
            thisline = line
        else:
            thisline = line.decode()

        if thisline[0:3] == '***':
            if refidx > 0:
                # return reference making sure to only have single spaces
                refdic[thisname] = re.sub(r'\s+', ' ', thisref)
            thisref = ''
            refidx += 1
            thisname = thisline.split()[0].strip('***')
            thisref += thisline[thisline.find(':')+1:]
        else:
            # make sure there is a space so words don't get concatenated
            thisref += ' '
            thisref += thisline.strip()

    reffile.close()
    pulsargz.close()  # close tar file

    # if not requiring ADS references just return the current dictionary
    if not useads:
        return refdic
    else:
        try:
            import ads
            from ads.exceptions import APIResponseError
        except ImportError:
            warnings.warn('Could not import ADS module, so no ADS information '
                          'will be included', UserWarning)
            return refdic, None

    # try getting cached references
    if not cache:
        adsrefs = {}
    else:
        from astropy.utils.data import is_url_in_cache

        tmpdir = tempfile.gettempdir()  # get system "temporary" directory
        dummyurl = 'file://{}/ads_cache'.format(tmpdir)
        dummyfile = os.path.join('{}'.format(tmpdir), 'ads_cache')

        # check if cached ADS refs list exists (using dummy URL)
        if is_url_in_cache(dummyurl):
            adsfile = download_file(dummyurl, cache=True, show_progress=False)

            try:
                fp = open(adsfile, 'r')
            except IOError:
                warnings.warn('Could not load ADS URL cache for references',
                              UserWarning)
                return refdic, None

            adsrefs = json.load(fp)
            fp.close()

            return refdic, adsrefs
        else:
            adsrefs = {}

    # loop over references
    j = 0
    for reftag in refdic:
        j = j + 1

        if reftag in PROB_REFS:
            continue

        refstring = refdic[reftag]

        # try getting the year from the string and split on this (allows years
        # between 1000-2999 and followed by a lowercase letter, e.g. 2009 or
        # 2009a)
        match = re.match(r'.*([1-2][0-9]{3}[az]{1}|[1-2][0-9]{3})', refstring)
        if match is None:
            continue

        # do splitting
        spl = re.split(r'([1-2][0-9]{3}[az]{1}|[1-2][0-9]{3})', refstring)

        if len(spl) != 3:
            # more than 1 "year", so ignore!
            continue

        year = spl[1] if len(spl[1]) == 4 else spl[1][:4]

        try:
            int(year)
        except ValueError:
            # "year" is not an integer
            continue

        # get the authors (remove line breaks/extra spaces and final full-stop)
        authors = spl[0].strip().strip('.')

        # remove " Jr." from any author names (as it causes issues!)
        authors = authors.replace(' Jr.', '')

        # separate out authors
        sepauthors = authors.split('.,')[:-1]

        if len(sepauthors) == 0:
            # no authors were parsed
            continue

        # remove any "'s for umlauts in author names
        sepauthors = [a.replace(r'"', '') for a in sepauthors]

        # split any authors that are seperated by an ampersand
        if '&' in sepauthors[-1] or 'and' in sepauthors[-1]:
            lastauthors = [a.strip() for a in re.split(r'& | and ', sepauthors.pop(-1))]
            sepauthors = sepauthors + lastauthors
            for i in range(len(sepauthors)-2):
                sepauthors[i] += '.'  # re-add final full stops where needed
            sepauthors[-1] += '.'
        else:
            sepauthors = [a+'.' for a in sepauthors]  # re-add final full stops

        # get the title
        try:
            # remove preceding or trailing full stops
            title = spl[2].strip('.').split('.')[0].strip()
        except RuntimeError:
            # could not get title so ignore this entry
            continue

        # try getting ADS references
        try:
            article = ads.SearchQuery(year=year, first_author=sepauthors[0],
                                      title=title)
        except APIResponseError:
            warnings.warn('Could not get reference information, so no ADS '
                          'information will be included', UserWarning)
            continue

        try:
            adsrefs[reftag] = ADS_URL.format(list(article)[0].bibcode)
        except (IndexError, APIResponseError):
            pass

    if cache:
        # output adsrefs to cache file
        try:
            # output to dummy temporary file and then "download" to cache
            fp = open(dummyfile, 'w')
            json.dump(adsrefs, fp, indent=2)
            fp.close()
        except IOError:
            raise IOError("Could not output the ADS references to a file")

        # cache the file
        _ = download_file(dummyurl, cache=True, show_progress=False)

        # remove the temporary file
        os.remove(dummyfile)

    return refdic, adsrefs

Example #9

0

Show file

def get_references(useads=False,
                   cache=True,
                   updaterefcache=False,
                   bibtex=False,
                   showfails=False):
    """
    Return a dictionary of paper
    `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_
    in the ATNF catalogue. The keys are the ref strings given in the ATNF
    catalogue.

    Note: The way that the ATNF references are stored has changed, so if you
    downloaded the catalogue with a version of psrqpy before v1.0.8 you may
    need to run this function with ``updaterefcache=True`` to allow references
    to work. You may also want to update the ATNF catalogue tarball with:

    >>> import psrqpy
    >>> psrqpy.QueryATNF(checkupdate=True)

    Args:
        useads (bool): boolean to set whether to use the python mod:`ads`
            module to get the NASA ADS URL for the references.
        cache (bool): use cached, or cache, the reference bundled with the
            catalogue tarball.
        updaterefcache (bool): update the cached references.
        bibtex (bool): if using ADS return the bibtex for the reference along
            with the ADS URL.
        showfails (bool): if outputting NASA ADS references set this flag to
            True to output the reference tags of references that fail to be
            found (mainly for debugging purposes).

    Returns:
        dict: a dictionary of references.
    """

    import tempfile
    import json

    # get the tarball
    try:
        dbtarfile = download_file(ATNF_TARBALL, cache=not updaterefcache)
    except IOError:
        raise IOError("Problem accessing ATNF catalogue tarball")

    try:
        # open tarball
        pulsargz = tarfile.open(dbtarfile, mode="r:gz")

        # extract the references
        reffile = pulsargz.extractfile("psrcat_tar/psrcat_ref")
    except IOError:
        raise IOError("Problem extracting the database file")

    refdic = {
        line.split()[0]: " ".join(line.split()[2:])
        for line in reffile.read().decode("utf-8").strip().split("***")
        if len(line) > 0
    }

    reffile.close()
    pulsargz.close()  # close tar file

    # if not requiring ADS references just return the current dictionary
    if not useads:
        return refdic
    else:
        try:
            import ads
            from ads.exceptions import APIResponseError
        except ImportError:
            warnings.warn(
                "Could not import ADS module, so no ADS information "
                "will be included",
                UserWarning,
            )
            return refdic, None

    # try getting cached references
    if not cache:
        adsrefs = {}
    else:
        from astropy.utils.data import is_url_in_cache

        tmpdir = tempfile.gettempdir()  # get system "temporary" directory
        dummyurl = "file://{}/ads_cache".format(tmpdir)
        dummyfile = os.path.join("{}".format(tmpdir), "ads_cache")

        # check if cached ADS refs list exists (using dummy URL)
        if is_url_in_cache(dummyurl) and not updaterefcache:
            adsfile = download_file(dummyurl, cache=True, show_progress=False)

            try:
                fp = open(adsfile, "r")
            except IOError:
                warnings.warn("Could not load ADS URL cache for references",
                              UserWarning)
                return refdic, None

            cachedrefs = json.load(fp)
            fp.close()

            adsrefs = None
            adsbibtex = None
            failures = None
            if "urls" in cachedrefs:
                adsrefs = cachedrefs["urls"]
            if bibtex and "bibtex" in cachedrefs:
                adsbibtex = cachedrefs["bibtex"]
            if showfails and "failures" in cacherefs:
                failures = cachedrefs["failures"]

            if bibtex:
                if failures is None:
                    return refdic, adsrefs, adsbibtex
                else:
                    return refdic, adsrefs, adsbibtex, failures
            else:
                if failures is None:
                    return refdic, adsrefs
                else:
                    return refdic, adsrefs, failures
        else:
            adsrefs = {}

    # loop over references
    j = 0
    bibcodes = {}
    failures = []
    for reftag in refdic:
        j = j + 1

        refstring = refdic[reftag]

        # check if IAU Circular or PhD thesis
        iaucirc = True if "IAU Circ" in refstring else False
        thesis = True if "PhD thesis" in refstring else False

        sepauthors = ""

        # check for arXiv identifier
        arxivid = None
        if "arXiv:" in refstring or "ArXiv:" in refstring:
            for searchterm in [
                    r"[Aa]rXiv:[0-9]{4}.[0-9]*",
                    r"[Aa]rXiv:astro-ph/[0-9]{7}",
            ]:
                match = re.search(searchterm, refstring)

                if match is not None:
                    arxivid = match.group().lower()
                    break
        else:
            if iaucirc:
                # get circular number (value after IAU Circ. No.)
                spl = re.split(r"([0-9]{4})", refstring)
                noidx = 1
                for val in spl:
                    if "IAU Circ" in val:
                        break
                    noidx += 1
                volume = spl[noidx]
            else:
                # do splitting on the year (allows between 1000-2999)
                spl = re.split(r"([1-2][0-9]{3})", refstring)

                if len(spl) < 2:
                    # no authors + year, so ignore!
                    failures.append(reftag)
                    continue

                year = spl[1] if len(spl[1]) == 4 else None

                try:
                    int(year)
                except (ValueError, TypeError):
                    # "year" is not an integer
                    failures.append(reftag)
                    continue

                # get the authors (remove line breaks/extra spaces and final full-stop)
                authors = spl[0].strip().strip(".")

                # remove " Jr." from any author names (as it causes issues!)
                authors = authors.replace(" Jr.", "")

                # replace ampersands/and with ".," for separation
                authors = authors.replace(" &", ".,").replace(" and", ".,")

                # separate out authors
                sepauthors = [
                    auth.lstrip() for auth in authors.split(".,")
                    if len(auth.strip()) > 0 and "et al" not in auth
                ]

                # remove any "'s for umlauts in author names
                sepauthors = [a.replace(r'"', "") for a in sepauthors]

                if len(sepauthors) == 0:
                    # no authors were parsed
                    failures.append(reftag)
                    continue

            if not thesis and not iaucirc:
                volume = None
                page = None
                if len(spl) > 2:
                    # join the remaining values and split on ","
                    extrainfo = [
                        info
                        for info in ("".join(spl[2:])).lstrip(".").split(",")
                        if len(info.strip()) > 0
                    ]

                    # get the journal volume (assumed to be second from last)
                    try:
                        # in case volume contains issue number in brackets perform split
                        volume = int(extrainfo[-2].strip().split("(")[0])
                    except (IndexError, TypeError, ValueError):
                        # could not get the volume
                        pass

                    # get the page if given (assumed to be th last value)
                    try:
                        testpage = re.sub("[\+\-\.]", "",
                                          extrainfo[-1].strip().split("-")[0])
                        if not testpage.startswith(
                                "eaao"):  # Science Advances page string
                            if (testpage[0].upper() in ["L", "A", "E"]
                                    or testpage[0:4]
                                    == ""):  # e.g. for ApJL, A&A, PASA
                                _ = int(testpage[1:])
                            elif testpage[-1].upper(
                            ) == "P":  # e.g., for early MNRAS
                                _ = int(testpage[:-1])
                            else:
                                _ = int(testpage)
                        page = testpage
                    except (IndexError, TypeError, ValueError):
                        # could not get the page
                        pass

                if volume is None or page is None:
                    failures.append(reftag)
                    continue

        # generate the query string
        if arxivid is None:
            if not thesis:
                if iaucirc:
                    myquery = 'bibstem:"IAUC" volume:"{}"'.format(volume)
                else:
                    # default query without authors
                    myquery = "year:{} AND volume:{} AND page:{}".format(
                        year, volume, page)

                    # add author if given
                    if len(sepauthors) > 0:
                        # check if authors have spaces in last names (a few cases due to formating of some accented names),
                        # if so try next author...
                        for k, thisauthor in enumerate(sepauthors):
                            if len(thisauthor.split(",")[0].split()) == 1:
                                myquery += ' AND author:"{}{}"'.format(
                                    "^" if k == 0 else "", thisauthor)
                                break
            else:
                myquery = 'year: {} AND author:"^{}" AND bibstem:"PhDT"'.format(
                    year, sepauthors[0])
        else:
            myquery = arxivid

        try:
            article = ads.SearchQuery(q=myquery)
        except APIResponseError:
            failures.append(reftag)
            warnings.warn(
                "Could not get reference information, so no ADS "
                "information for {} will be included".format(reftag),
                UserWarning,
            )
            continue

        for paper in article:
            bibcodes[reftag] = paper.bibcode
            adsrefs[reftag] = ADS_URL.format(bibcodes[reftag])

        # check if paper bibcode was found
        if reftag not in bibcodes:
            failures.append(reftag)

    if bibtex:
        # use ExportQuery to get bibtex
        expquery = ads.ExportQuery(list(
            bibcodes.values())).execute().split("\n\n")

        adsbibtex = {}
        for reftag in bibcodes:
            for equery in expquery:
                if bibcodes[reftag] in equery:
                    adsbibtex[reftag] = equery
                    break

    if cache:
        # output adsrefs to cache file
        try:
            # output to dummy temporary file and then "download" to cache
            fp = open(dummyfile, "w")

            cachedic = {}
            cachedic["urls"] = adsrefs

            if bibtex:
                cachedic["bibtex"] = adsbibtex

            if showfails:
                cachedic["failures"] = failures

            json.dump(cachedic, fp, indent=2)
            fp.close()
        except IOError:
            raise IOError("Could not output the ADS references to a file")

        # cache the file
        _ = download_file(dummyurl, cache=True, show_progress=False)

        # remove the temporary file
        os.remove(dummyfile)

    if bibtex:
        if showfails:
            return refdic, adsrefs, adsbibtex, failures
        else:
            return refdic, adsrefs, adsbibtex
    else:
        if showfails:
            return refdic, adsrefs, failures
        else:
            return refdic, adsrefs