def test_is_url_in_cache(): from astropy.utils.data import download_file, is_url_in_cache assert not is_url_in_cache('http://astropy.org/nonexistentfile') download_file(TESTURL, cache=True, show_progress=False) assert is_url_in_cache(TESTURL)
def get_iers_up_to_date(mjd=Time.now().mjd - 45.0): """ Update the IERS B table to include MJD (defaults to 45 days ago) and open IERS_Auto """ # First clear the IERS_Auto table IERS_Auto.iers_table = None if mjd > Time.now().mjd: raise ValueError("IERS B data requested for future MJD {}".format(mjd)) might_be_old = is_url_in_cache(IERS_B_URL) iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True)) if might_be_old and iers_b[-1]["MJD"].to_value(u.d) < mjd: # Try wiping the download and re-downloading log.info("IERS B Table appears to be old. Attempting to re-download.") clear_download_cache(IERS_B_URL) iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True)) if iers_b[-1]["MJD"].to_value(u.d) < mjd: log.warning("IERS B data not yet available for MJD {}".format(mjd)) # Now open IERS_Auto with no argument, so it should use the IERS_B that we just made sure was up to date iers_auto = IERS_Auto.open() if astropy.version.major >= 4: # Tell astropy to use this table for all future transformations earth_orientation_table.set(iers_auto)
def get_iers_b_up_to_date(mjd): """Update the IERS B table to include MJD if necessary""" if Time.now().mjd <= mjd: raise ValueError("IERS B data requested for future MJD {}".format(mjd)) might_be_old = is_url_in_cache(IERS_B_URL) iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True)) if might_be_old and iers_b[-1]["MJD"].to_value(u.d) < mjd: # Try wiping the download and re-downloading clear_download_cache(IERS_B_URL) iers_b = IERS_B.open(download_file(IERS_B_URL, cache=True)) if iers_b[-1]["MJD"].to_value(u.d) < mjd: raise ValueError( "IERS B data not yet available for MJD {}".format(mjd)) return iers_b
def save_fits(self, savepath, link_cache='hard'): """ Save a FITS file to savepath Parameters ---------- savepath : str The full path to a FITS filename, e.g. "file.fits", or "/path/to/file.fits". link_cache : 'hard', 'sym', or False Try to create a hard or symbolic link to the astropy cached file? If the system is unable to create a hardlink, the file will be copied to the target location. """ self.get_fits() target_key = str(self._target) # There has been some internal refactoring in astropy.utils.data # so we do this check. Update when minimum required astropy changes. if ASTROPY_LT_4_0: if not aud.is_url_in_cache(target_key): raise IOError("Cached file not found / does not exist.") target = aud.download_file(target_key, cache=True) else: target = aud.download_file(target_key, cache=True, sources=[]) if link_cache == 'hard': try: os.link(target, savepath) except (IOError, OSError, AttributeError): shutil.copy(target, savepath) elif link_cache == 'sym': try: os.symlink(target, savepath) except AttributeError: raise OSError('Creating symlinks is not possible on this OS.') else: shutil.copy(target, savepath)
def auto_open(cls, files=None): """Attempt to get an up-to-date leap-second list. The routine will try the files in sequence until it finds one whose expiration date is "good enough" (see below). If none are good enough, it returns the one with the most recent expiration date, warning if that file is expired. For remote files that are cached already, the cached file is tried first before attempting to retrieve it again. Parameters ---------- files : list of path-like, optional List of files/URLs to attempt to open. By default, uses ``cls._auto_open_files``. Returns ------- leap_seconds : `~astropy.utils.iers.LeapSeconds` Up to date leap-second table Notes ----- Bulletin C is released about 10 days after a possible leap second is introduced, i.e., mid-January or mid-July. Expiration days are thus generally at least 150 days after the present. We look for a file that expires more than 180 - `~astropy.utils.iers.Conf.auto_max_age` after the present. """ offset = 180 - (30 if conf.auto_max_age is None else conf.auto_max_age) good_enough = cls._today() + TimeDelta(offset, format='jd') if files is None: # Basic files to go over (entries in _auto_open_files can be # configuration items, which we want to be sure are up to date). files = [getattr(conf, f, f) for f in cls._auto_open_files] # Remove empty entries. files = [f for f in files if f] # Our trials start with normal files and remote ones that are # already in cache. The bools here indicate that the cache # should be used. trials = [(f, True) for f in files if not urlparse(f).netloc or is_url_in_cache(f)] # If we are allowed to download, we try downloading new versions # if none of the above worked. if conf.auto_download: trials += [(f, False) for f in files if urlparse(f).netloc] self = None err_list = [] # Go through all entries, and return the first one that # is not expired, or the most up to date one. for f, allow_cache in trials: if not allow_cache: clear_download_cache(f) try: trial = cls.open(f, cache=True) except Exception as exc: err_list.append(exc) continue if self is None or trial.expires > self.expires: self = trial self.meta['data_url'] = str(f) if self.expires > good_enough: break if self is None: raise ValueError('none of the files could be read. The ' 'following errors were raised:\n' + str(err_list)) if self.expires < self._today() and conf.auto_max_age is not None: warn('leap-second file is expired.', IERSStaleWarning) return self
def test_basic_getting(self): voc = vocabularies.get_vocabulary("datalink/core") assert "progenitor" in voc["terms"] assert data.is_url_in_cache("http://www.ivoa.net/rdf/datalink/core")
def get_references(useads=False, cache=True): """ Return a dictionary of paper `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_ in the ATNF catalogue. The keys are the ref strings given in the ATNF catalogue. Args: useads (bool): boolean to set whether to use the python mod:`ads` module to get the NASA ADS URL for the references. cache (bool): use cached, or cache, the reference bundled with the catalogue tarball. Returns: dict: a dictionary of references. """ import tempfile import json # get the tarball try: dbtarfile = download_file(ATNF_TARBALL, cache=cache) except IOError: raise IOError('Problem accessing ATNF catalogue tarball') try: # open tarball pulsargz = tarfile.open(dbtarfile, mode='r:gz') # extract the references reffile = pulsargz.extractfile('psrcat_tar/psrcat_ref') except IOError: raise IOError('Problem extracting the database file') refdic = {} refidx = 0 thisref = '' for line in reffile.readlines(): if isinstance(line, string_types): thisline = line else: thisline = line.decode() if thisline[0:3] == '***': if refidx > 0: # return reference making sure to only have single spaces refdic[thisname] = re.sub(r'\s+', ' ', thisref) thisref = '' refidx += 1 thisname = thisline.split()[0].strip('***') thisref += thisline[thisline.find(':')+1:] else: # make sure there is a space so words don't get concatenated thisref += ' ' thisref += thisline.strip() reffile.close() pulsargz.close() # close tar file # if not requiring ADS references just return the current dictionary if not useads: return refdic else: try: import ads from ads.exceptions import APIResponseError except ImportError: warnings.warn('Could not import ADS module, so no ADS information ' 'will be included', UserWarning) return refdic, None # try getting cached references if not cache: adsrefs = {} else: from astropy.utils.data import is_url_in_cache tmpdir = tempfile.gettempdir() # get system "temporary" directory dummyurl = 'file://{}/ads_cache'.format(tmpdir) dummyfile = os.path.join('{}'.format(tmpdir), 'ads_cache') # check if cached ADS refs list exists (using dummy URL) if is_url_in_cache(dummyurl): adsfile = download_file(dummyurl, cache=True, show_progress=False) try: fp = open(adsfile, 'r') except IOError: warnings.warn('Could not load ADS URL cache for references', UserWarning) return refdic, None adsrefs = json.load(fp) fp.close() return refdic, adsrefs else: adsrefs = {} # loop over references j = 0 for reftag in refdic: j = j + 1 if reftag in PROB_REFS: continue refstring = refdic[reftag] # try getting the year from the string and split on this (allows years # between 1000-2999 and followed by a lowercase letter, e.g. 2009 or # 2009a) match = re.match(r'.*([1-2][0-9]{3}[az]{1}|[1-2][0-9]{3})', refstring) if match is None: continue # do splitting spl = re.split(r'([1-2][0-9]{3}[az]{1}|[1-2][0-9]{3})', refstring) if len(spl) != 3: # more than 1 "year", so ignore! continue year = spl[1] if len(spl[1]) == 4 else spl[1][:4] try: int(year) except ValueError: # "year" is not an integer continue # get the authors (remove line breaks/extra spaces and final full-stop) authors = spl[0].strip().strip('.') # remove " Jr." from any author names (as it causes issues!) authors = authors.replace(' Jr.', '') # separate out authors sepauthors = authors.split('.,')[:-1] if len(sepauthors) == 0: # no authors were parsed continue # remove any "'s for umlauts in author names sepauthors = [a.replace(r'"', '') for a in sepauthors] # split any authors that are seperated by an ampersand if '&' in sepauthors[-1] or 'and' in sepauthors[-1]: lastauthors = [a.strip() for a in re.split(r'& | and ', sepauthors.pop(-1))] sepauthors = sepauthors + lastauthors for i in range(len(sepauthors)-2): sepauthors[i] += '.' # re-add final full stops where needed sepauthors[-1] += '.' else: sepauthors = [a+'.' for a in sepauthors] # re-add final full stops # get the title try: # remove preceding or trailing full stops title = spl[2].strip('.').split('.')[0].strip() except RuntimeError: # could not get title so ignore this entry continue # try getting ADS references try: article = ads.SearchQuery(year=year, first_author=sepauthors[0], title=title) except APIResponseError: warnings.warn('Could not get reference information, so no ADS ' 'information will be included', UserWarning) continue try: adsrefs[reftag] = ADS_URL.format(list(article)[0].bibcode) except (IndexError, APIResponseError): pass if cache: # output adsrefs to cache file try: # output to dummy temporary file and then "download" to cache fp = open(dummyfile, 'w') json.dump(adsrefs, fp, indent=2) fp.close() except IOError: raise IOError("Could not output the ADS references to a file") # cache the file _ = download_file(dummyurl, cache=True, show_progress=False) # remove the temporary file os.remove(dummyfile) return refdic, adsrefs
def get_references(useads=False, cache=True, updaterefcache=False, bibtex=False, showfails=False): """ Return a dictionary of paper `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_ in the ATNF catalogue. The keys are the ref strings given in the ATNF catalogue. Note: The way that the ATNF references are stored has changed, so if you downloaded the catalogue with a version of psrqpy before v1.0.8 you may need to run this function with ``updaterefcache=True`` to allow references to work. You may also want to update the ATNF catalogue tarball with: >>> import psrqpy >>> psrqpy.QueryATNF(checkupdate=True) Args: useads (bool): boolean to set whether to use the python mod:`ads` module to get the NASA ADS URL for the references. cache (bool): use cached, or cache, the reference bundled with the catalogue tarball. updaterefcache (bool): update the cached references. bibtex (bool): if using ADS return the bibtex for the reference along with the ADS URL. showfails (bool): if outputting NASA ADS references set this flag to True to output the reference tags of references that fail to be found (mainly for debugging purposes). Returns: dict: a dictionary of references. """ import tempfile import json # get the tarball try: dbtarfile = download_file(ATNF_TARBALL, cache=not updaterefcache) except IOError: raise IOError("Problem accessing ATNF catalogue tarball") try: # open tarball pulsargz = tarfile.open(dbtarfile, mode="r:gz") # extract the references reffile = pulsargz.extractfile("psrcat_tar/psrcat_ref") except IOError: raise IOError("Problem extracting the database file") refdic = { line.split()[0]: " ".join(line.split()[2:]) for line in reffile.read().decode("utf-8").strip().split("***") if len(line) > 0 } reffile.close() pulsargz.close() # close tar file # if not requiring ADS references just return the current dictionary if not useads: return refdic else: try: import ads from ads.exceptions import APIResponseError except ImportError: warnings.warn( "Could not import ADS module, so no ADS information " "will be included", UserWarning, ) return refdic, None # try getting cached references if not cache: adsrefs = {} else: from astropy.utils.data import is_url_in_cache tmpdir = tempfile.gettempdir() # get system "temporary" directory dummyurl = "file://{}/ads_cache".format(tmpdir) dummyfile = os.path.join("{}".format(tmpdir), "ads_cache") # check if cached ADS refs list exists (using dummy URL) if is_url_in_cache(dummyurl) and not updaterefcache: adsfile = download_file(dummyurl, cache=True, show_progress=False) try: fp = open(adsfile, "r") except IOError: warnings.warn("Could not load ADS URL cache for references", UserWarning) return refdic, None cachedrefs = json.load(fp) fp.close() adsrefs = None adsbibtex = None failures = None if "urls" in cachedrefs: adsrefs = cachedrefs["urls"] if bibtex and "bibtex" in cachedrefs: adsbibtex = cachedrefs["bibtex"] if showfails and "failures" in cacherefs: failures = cachedrefs["failures"] if bibtex: if failures is None: return refdic, adsrefs, adsbibtex else: return refdic, adsrefs, adsbibtex, failures else: if failures is None: return refdic, adsrefs else: return refdic, adsrefs, failures else: adsrefs = {} # loop over references j = 0 bibcodes = {} failures = [] for reftag in refdic: j = j + 1 refstring = refdic[reftag] # check if IAU Circular or PhD thesis iaucirc = True if "IAU Circ" in refstring else False thesis = True if "PhD thesis" in refstring else False sepauthors = "" # check for arXiv identifier arxivid = None if "arXiv:" in refstring or "ArXiv:" in refstring: for searchterm in [ r"[Aa]rXiv:[0-9]{4}.[0-9]*", r"[Aa]rXiv:astro-ph/[0-9]{7}", ]: match = re.search(searchterm, refstring) if match is not None: arxivid = match.group().lower() break else: if iaucirc: # get circular number (value after IAU Circ. No.) spl = re.split(r"([0-9]{4})", refstring) noidx = 1 for val in spl: if "IAU Circ" in val: break noidx += 1 volume = spl[noidx] else: # do splitting on the year (allows between 1000-2999) spl = re.split(r"([1-2][0-9]{3})", refstring) if len(spl) < 2: # no authors + year, so ignore! failures.append(reftag) continue year = spl[1] if len(spl[1]) == 4 else None try: int(year) except (ValueError, TypeError): # "year" is not an integer failures.append(reftag) continue # get the authors (remove line breaks/extra spaces and final full-stop) authors = spl[0].strip().strip(".") # remove " Jr." from any author names (as it causes issues!) authors = authors.replace(" Jr.", "") # replace ampersands/and with ".," for separation authors = authors.replace(" &", ".,").replace(" and", ".,") # separate out authors sepauthors = [ auth.lstrip() for auth in authors.split(".,") if len(auth.strip()) > 0 and "et al" not in auth ] # remove any "'s for umlauts in author names sepauthors = [a.replace(r'"', "") for a in sepauthors] if len(sepauthors) == 0: # no authors were parsed failures.append(reftag) continue if not thesis and not iaucirc: volume = None page = None if len(spl) > 2: # join the remaining values and split on "," extrainfo = [ info for info in ("".join(spl[2:])).lstrip(".").split(",") if len(info.strip()) > 0 ] # get the journal volume (assumed to be second from last) try: # in case volume contains issue number in brackets perform split volume = int(extrainfo[-2].strip().split("(")[0]) except (IndexError, TypeError, ValueError): # could not get the volume pass # get the page if given (assumed to be th last value) try: testpage = re.sub("[\+\-\.]", "", extrainfo[-1].strip().split("-")[0]) if not testpage.startswith( "eaao"): # Science Advances page string if (testpage[0].upper() in ["L", "A", "E"] or testpage[0:4] == ""): # e.g. for ApJL, A&A, PASA _ = int(testpage[1:]) elif testpage[-1].upper( ) == "P": # e.g., for early MNRAS _ = int(testpage[:-1]) else: _ = int(testpage) page = testpage except (IndexError, TypeError, ValueError): # could not get the page pass if volume is None or page is None: failures.append(reftag) continue # generate the query string if arxivid is None: if not thesis: if iaucirc: myquery = 'bibstem:"IAUC" volume:"{}"'.format(volume) else: # default query without authors myquery = "year:{} AND volume:{} AND page:{}".format( year, volume, page) # add author if given if len(sepauthors) > 0: # check if authors have spaces in last names (a few cases due to formating of some accented names), # if so try next author... for k, thisauthor in enumerate(sepauthors): if len(thisauthor.split(",")[0].split()) == 1: myquery += ' AND author:"{}{}"'.format( "^" if k == 0 else "", thisauthor) break else: myquery = 'year: {} AND author:"^{}" AND bibstem:"PhDT"'.format( year, sepauthors[0]) else: myquery = arxivid try: article = ads.SearchQuery(q=myquery) except APIResponseError: failures.append(reftag) warnings.warn( "Could not get reference information, so no ADS " "information for {} will be included".format(reftag), UserWarning, ) continue for paper in article: bibcodes[reftag] = paper.bibcode adsrefs[reftag] = ADS_URL.format(bibcodes[reftag]) # check if paper bibcode was found if reftag not in bibcodes: failures.append(reftag) if bibtex: # use ExportQuery to get bibtex expquery = ads.ExportQuery(list( bibcodes.values())).execute().split("\n\n") adsbibtex = {} for reftag in bibcodes: for equery in expquery: if bibcodes[reftag] in equery: adsbibtex[reftag] = equery break if cache: # output adsrefs to cache file try: # output to dummy temporary file and then "download" to cache fp = open(dummyfile, "w") cachedic = {} cachedic["urls"] = adsrefs if bibtex: cachedic["bibtex"] = adsbibtex if showfails: cachedic["failures"] = failures json.dump(cachedic, fp, indent=2) fp.close() except IOError: raise IOError("Could not output the ADS references to a file") # cache the file _ = download_file(dummyurl, cache=True, show_progress=False) # remove the temporary file os.remove(dummyfile) if bibtex: if showfails: return refdic, adsrefs, adsbibtex, failures else: return refdic, adsrefs, adsbibtex else: if showfails: return refdic, adsrefs, failures else: return refdic, adsrefs