Exemple #1
0
    def __init__(self, kwargs):
        backend = SETTINGS.get(f"{self.kind}-plotting-backend", None)
        backend = kwargs.pop("backend", backend)

        options = {}
        options.update(SETTINGS.get("plotting-options", {}))
        options.update(OPTIONS)
        options.update(kwargs)
        self.backend = DRIVERS[backend](Options(options))
Exemple #2
0
    def connection(self):
        if self._connection is None:
            cache_dir = SETTINGS.get("cache-directory")
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir, exist_ok=True)
            cache_db = os.path.join(cache_dir, CACHE_DB)
            LOG.debug("Cache database is %s", cache_db)
            self._connection = sqlite3.connect(cache_db)
            # So we can use rows as dictionaries
            self._connection.row_factory = sqlite3.Row

            # If you change the schema, change VERSION above
            self._connection.execute(
                """
                CREATE TABLE IF NOT EXISTS cache (
                        path          TEXT PRIMARY KEY,
                        owner         TEXT NOT NULL,
                        args          TEXT NOT NULL,
                        creation_date TEXT NOT NULL,
                        flags         INTEGER DEFAULT 0,
                        owner_data    TEXT,
                        last_access   TEXT NOT NULL,
                        type          TEXT,
                        parent        TEXT,
                        replaced      TEXT,
                        extra         TEXT,
                        expires       INTEGER,
                        accesses      INTEGER,
                        size          INTEGER);"""
            )
        return self._connection
Exemple #3
0
    def prepare(self, url):

        o = urlparse(url)
        assert o.scheme == "ftp"

        if "@" in o.netloc:
            auth, server = o.netloc.split("@")
            user, password = auth.split(":")
        else:
            auth, server = None, o.netloc
            user, password = "******", "anonymous"

        ftp = FTP(
            server,
            timeout=SETTINGS.get("url-download-timeout"),
        )

        if auth:
            ftp.login(user, password)
        else:
            ftp.login()

        ftp.cwd(os.path.dirname(o.path))
        ftp.set_pasv(True)
        self.filename = os.path.basename(o.path)
        self.ftp = ftp
        return ftp.size(self.filename)
Exemple #4
0
    def out_of_date(self, url, path, cache_data):
        if SETTINGS.get("check-out-of-date-urls") is False:
            return False

        if self.downloader.out_of_date(path, cache_data):
            if SETTINGS.get(
                    "download-out-of-date-urls") or self.update_if_out_of_date:
                LOG.warning(
                    "Invalidating cache version and re-downloading %s",
                    self.url,
                )
                return True
            else:
                LOG.warning(
                    "To enable automatic downloading of updated URLs set the 'download-out-of-date-urls'"
                    " setting to True", )
        return False
Exemple #5
0
    def _check_cache_size(self):

        # Check absolute limit
        size = self._cache_size()
        maximum = SETTINGS.get("maximum-cache-size")
        if maximum is not None and size > maximum:
            self._housekeeping()
            self._decache(size - maximum)

        # Check relative limit
        size = self._cache_size()
        usage = SETTINGS.get("maximum-cache-disk-usage")
        cache_directory = SETTINGS.get("cache-directory")
        df = psutil.disk_usage(cache_directory)
        if df.percent > usage:
            LOG.debug("Cache disk usage %s, limit %s", df.percent, usage)
            self._housekeeping()
            delta = (df.percent - usage) * df.total * 0.01
            self._decache(delta)
Exemple #6
0
 def __repr__(self):
     cache_dir = SETTINGS.get("cache-directory")
     path = getattr(self, "path", None)
     if isinstance(path, str):
         path = path.replace(cache_dir, "CACHE:")
     try:
         reader_class_name = str(self._reader.__class__.__name__)
     except AttributeError as e:
         reader_class_name = str(e)
     except:  # noqa: E722
         reader_class_name = "Unknown"
     return f"{self.__class__.__name__}({path},{reader_class_name})"
Exemple #7
0
    def _housekeeping(self, clean=False):
        top = SETTINGS.get("cache-directory")
        with self.connection as db:
            for name in os.listdir(top):
                if name == CACHE_DB:
                    continue

                full = os.path.join(top, name)
                count = db.execute("SELECT count(*) FROM cache WHERE path=?",
                                   (full, )).fetchone()[0]

                if count > 0:
                    continue

                parent = None
                start = full.split(".")[0] + "%"
                for n in db.execute(
                        "SELECT path FROM cache WHERE parent IS NULL and path LIKE ?",
                    (start, ),
                ).fetchall():
                    if full.startswith(n["path"]):
                        parent = n["path"]
                        break

                try:
                    s = os.stat(full)
                    if time.time() - s.st_mtime < 120:  # Two minutes
                        continue
                except OSError:
                    pass

                if parent is None:
                    LOG.warning(f"CliMetLab cache: orphan found: {full}")
                else:
                    LOG.debug(
                        f"CliMetLab cache: orphan found: {full} with parent {parent}"
                    )

                self._register_cache_file(
                    full,
                    "orphans",
                    None,
                    parent,
                )
        self._update_cache(clean=clean)
Exemple #8
0
    def prepare(self, url):

        size = None
        headers = self.headers(url)
        if "content-length" in headers:
            try:
                size = int(headers["content-length"])
            except Exception:
                LOG.exception("content-length %s", url)

        r = requests.get(
            url,
            stream=True,
            verify=self.owner.verify,
            timeout=SETTINGS.get("url-download-timeout"),
            headers=self.owner.http_headers,
        )
        r.raise_for_status()

        self.request = r

        return size
Exemple #9
0
    def out_of_date(self, url, path, cache_data):
        if cache_data is not None:

            # TODO: check 'cache-control' to see if we should check the etag
            if "cache-control" in cache_data:
                pass

            if "expires" in cache_data:
                try:
                    expires = parse_date(cache_data["expires"])
                    now = pytz.UTC.localize(datetime.datetime.utcnow())
                    if expires > now:
                        LOG.debug("URL %s not expired (%s > %s)", url, expires,
                                  now)
                        return False
                except Exception:
                    LOG.exception("Failed to check URL expiry date '%s'",
                                  cache_data["expires"])

            headers = self.headers(url)
            cached_etag = cache_data.get("etag")
            remote_etag = headers.get("etag")

            if cached_etag != remote_etag:
                LOG.warning("Remote content of URL %s has changed", url)
                if (SETTINGS.get("download-updated-urls")
                        or self.owner.update_if_out_of_date):
                    LOG.warning(
                        "Invalidating cache version and re-downloading %s",
                        url)
                    return True
                LOG.warning(
                    "To enable automatic downloading of updated URLs set the 'download-updated-urls' setting to True",
                )
            else:
                LOG.debug("Remote content of URL %s unchanged", url)

        return False
Exemple #10
0
 def __repr__(self):
     cache_dir = SETTINGS.get("cache-directory")
     path = self.path.replace(cache_dir, "CACHE:")
     return f"{self.__class__.__name__}({path},{self._reader.__class__.__name__})"
Exemple #11
0
 def __init__(self, kwargs):
     options = {}
     options.update(SETTINGS.get("plotting-options", {}))
     options.update(OPTIONS)
     options.update(kwargs)
     self.driver = Driver(Options(options))
Exemple #12
0
    def __init__(
            self,
            url,
            parts=None,
            filter=None,
            merger=None,
            verify=True,
            force=None,
            chunk_size=1024 * 1024,
            range_method="auto",
            http_headers=None,
            update_if_out_of_date=False,
            fake_headers=None,  # When HEAD is not allowed but you know the size
    ):

        super().__init__(filter=filter, merger=merger)

        # TODO: re-enable this feature
        extension = None

        self.url = url
        self.parts = parts
        LOG.debug("URL %s", url)

        self.update_if_out_of_date = update_if_out_of_date

        self.downloader = Downloader(
            url,
            chunk_size=chunk_size,
            timeout=SETTINGS.get("url-download-timeout"),
            verify=verify,
            parts=parts,
            range_method=range_method,
            http_headers=http_headers,
            fake_headers=fake_headers,
            statistics_gatherer=record_statistics,
            progress_bar=progress_bar,
            resume_transfers=True,
            override_target_file=False,
            download_file_extension=".download",
        )

        if extension and extension[0] != ".":
            extension = "." + extension

        if extension is None:
            extension = self.downloader.extension()

        self.path = self.downloader.local_path()
        if self.path is not None:
            return

        if force is None:
            force = self.out_of_date

        def download(target, _):
            self.downloader.download(target)
            return self.downloader.cache_data()

        self.path = self.cache_file(
            download,
            dict(url=url, parts=parts),
            extension=extension,
            force=force,
        )
Exemple #13
0
def cache_file(
    owner: str,
    create,
    args,
    hash_extra=None,
    extension: str = ".cache",
    force=None,
    replace=None,
):
    """Creates a cache file in the climetlab cache-directory (defined in the :py:class:`Settings`).
    Uses :py:func:`_register_cache_file()`

    Parameters
    ----------
    owner : str
        The owner of the cache file is generally the name of the source that generated the cache.
    extension : str, optional
        Extension filename (such as ".nc" for NetCDF, etc.), by default ".cache"

    Returns
    -------
    path : str
        Full path to the cache file.
    """

    m = hashlib.sha256()
    m.update(owner.encode("utf-8"))
    m.update(json.dumps(args, sort_keys=True).encode("utf-8"))
    m.update(json.dumps(hash_extra, sort_keys=True).encode("utf-8"))
    m.update(json.dumps(extension, sort_keys=True).encode("utf-8"))

    if replace is not None:
        # Don't replace files that are not in the cache
        if not file_in_cache_directory(replace):
            replace = None

    path = os.path.join(
        SETTINGS.get("cache-directory"),
        "{}-{}{}".format(
            owner.lower(),
            m.hexdigest(),
            extension,
        ),
    )

    record = register_cache_file(path, owner, args)
    if os.path.exists(path):
        if callable(force):
            owner_data = record["owner_data"]
            if owner_data is not None:
                owner_data = json.loads(owner_data)
            force = force(args, path, owner_data)

        if force:
            decache_file(path)

    if not os.path.exists(path):

        tmp = ".{}-{}.tmp".format(os.getpid(), threading.get_ident())

        owner_data = create(path + tmp, args)

        os.rename(path + tmp, path)

        update_entry(path, owner_data)

        check_cache_size()

    return path
Exemple #14
0
 def _cache_directory(self):
     cache_directory = SETTINGS.get("cache-directory")
     return cache_directory
Exemple #15
0
 def _file_in_cache_directory(self, path):
     cache_directory = SETTINGS.get("cache-directory")
     return path.startswith(cache_directory)
Exemple #16
0
    )

    record = register_cache_file(path, owner, args)
    if os.path.exists(path):
        if callable(force):
            owner_data = record["owner_data"]
            if owner_data is not None:
                owner_data = json.loads(owner_data)
            force = force(args, path, owner_data)

        if force:
            decache_file(path)

    if not os.path.exists(path):

        tmp = ".{}-{}.tmp".format(os.getpid(), threading.get_ident())

        owner_data = create(path + tmp, args)

        os.rename(path + tmp, path)

        update_entry(path, owner_data)

        check_cache_size()

    return path


# housekeeping()
SETTINGS.on_change(settings_changed)
Exemple #17
0
 def settings(self, name):
     return SETTINGS.get(name)
Exemple #18
0
 def __repr__(self):
     cache_dir = SETTINGS.get("cache-directory")
     path = getattr(self, "path", None)
     if isinstance(path, str):
         path = path.replace(cache_dir, "CACHE:")
     return f"{self.__class__.__name__}({path})"