Example #1
0
    def _start_ssh(self, stdout_writer: int,
                   suppress_stderr: bool) -> subprocess.Popen:
        """Start the SSH process to run the remote outrun instance."""
        try:
            # Set up command to run to start remote session
            outrun_command = self._compose_remote_outrun_command()
            ssh_command = self._compose_ssh_command(outrun_command)

            def preexec_fn() -> None:
                # Terminate ssh if outrun is terminated
                self._set_death_signal(signal.SIGTERM)

            # Start SSH session that invokes outrun on the remote
            log.debug(f"running {ssh_command}")

            ssh = subprocess.Popen(
                ssh_command,
                # Proxy stdout to token skimmer
                stdout=stdout_writer,
                # Conditionally capture stderr
                stderr=subprocess.PIPE if suppress_stderr else None,
                preexec_fn=preexec_fn,
            )

            return ssh
        except Exception as e:
            raise RuntimeError(f"failed to start ssh: {e}")
Example #2
0
        def wrapper(*args, **kwargs):
            # Support coverage.py within FUSE threads.
            if hasattr(threading, "_trace_hook"):
                sys.settrace(getattr(threading, "_trace_hook"))

            try:
                res = fn(*args, **kwargs)

                if res is None:
                    res = 0

                return res
            except OSError as e:
                # FUSE expects an error to be returned as negative errno.
                if e.errno:
                    return -e.errno
                else:
                    return -errno.EIO
            except NotImplementedError:
                log.debug(f"fuse::{name}() not implemented!")

                return -errno.ENOSYS
            except Exception:
                log.warning(f"fuse::{name}() raised an unexpected exception:")
                log.warning(traceback.format_exc())

                return -errno.EIO
Example #3
0
    def sync(self) -> None:
        """
        Synchronize the cache with the current state of the local machine.

        This is implemented by sending all cached metadata back to the local machine and
        having it compare the entries with the current metadata on disk. Any entries
        that have meaningfully changed (everything aside from last access timestamp) are
        returned and updated. Additionally, the local machine is informed of the
        contents in the remote cache to ensure that there are no superfluous prefetches.

        This sounds inefficient, but in practice this is much faster than checking the
        freshness of cache entries one-by-one upon first access because it avoids the
        latency overhead.
        """
        cached_metadata = {
            entry.path: entry.meta
            for key, entry in self._entries.items()
            if key.startswith(self._machine_id)
        }

        if len(cached_metadata) == 0:
            return

        changed_metadata: Dict[str, Metadata]
        changed_metadata = self._client.get_changed_metadata(cached_metadata)

        for path, new_metadata in changed_metadata.items():
            log.debug(f"updating metadata cache for {path}")

            with self._lock_entry(path) as entry:  # type: CacheEntry
                entry.meta = new_metadata
                entry.last_update = time.time()

                if entry.contents:
                    # Delete cached contents if entry is no longer an existent file
                    if entry.meta.error:
                        entry.contents = None
                    elif entry.meta.attr and not stat.S_ISREG(entry.meta.attr.st_mode):
                        entry.contents = None
                    else:
                        entry.contents.dirty = True

        # In addition to syncing metadata, also mark content as having been cached
        if self._prefetch:
            self._client.mark_previously_fetched_contents(
                [
                    entry.path
                    for entry in self._entries.values()
                    if entry.contents and not entry.contents.dirty
                ]
            )
Example #4
0
        def fn(*args: Any) -> Any:
            """
            Call wrapped remote function with the given arguments.

            Serializes the arguments, makes the call and deserializes the resulting
            return value or raises the resulting exception.

            ZeroMQ connections are stateless so the token is sent again with every call.
            """
            sock = self._socket()

            t_call = time.time()

            # Serialize arguments and invoke remote function
            call = self._encoding.pack((self.token, name, *args))
            sock.send(call)

            # Wait for answer (return value, exception, token error, or RPC error)
            try:
                typ, *ret = self._encoding.unpack(sock.recv())
            except zmq.ZMQError:
                raise IOError("rpc call timed out")

            t_return = time.time()

            # Explicit check before logging because _summarize_args is relatively slow
            if log.isEnabledFor(logging.DEBUG):
                t_millis = round((t_return - t_call) * 1000)
                log.debug(
                    f"rpc::{name}{self._summarize_args(args)} - {t_millis} ms")

            if typ == ReturnType.NORMAL.value:
                if len(ret) == 1:
                    return ret[0]
                else:
                    return ret
            elif typ == ReturnType.EXCEPTION.value:
                raise ret[0]
            elif typ == ReturnType.TOKEN_ERROR.value:
                raise InvalidTokenError(
                    "token mismatch between client and server")
            else:
                raise ValueError(f"unexpected return type {typ}")
Example #5
0
    def _try_store_prefetches(self, path: str,
                              prefetches: List[PrefetchEntry]) -> None:
        """
        Create cache entries for prefetched entries if possible.

        Prefetched entries are saved on a best-effort basis depending on whether a lock
        can be acquired for the cache entry. This is to prevent deadlocks from occuring
        when multiple threads are trying to save overlapping prefetched data.

        There is one exception to this rule and that's where we're prefetching data for
        the file that originally triggered the prefetching. In that case it should
        already be locked in the calling function and we can ignore the inability to
        acquire a lock.
        """
        for prefetch in prefetches:
            key = self._entry_key(prefetch.path)

            with self._entry_locks.lock(key, False) as acquired:
                if not acquired and prefetch.path != path:
                    continue

                if prefetch.contents:
                    log.debug(
                        f"storing prefetched contents for {prefetch.path}")
                else:
                    log.debug(
                        f"storing prefetched metadata for {prefetch.path}")

                # Create cache entry for prefetch if it doesn't exist yet and assign
                # special timestamp to indicate that it has not been accessed yet.
                if key not in self._entries:
                    self._entries[key] = CacheEntry(path=prefetch.path,
                                                    meta=prefetch.metadata,
                                                    last_access=0)

                entry = self._entries[key]

                # If prefetch contains contents and we don't have cached contents yet,
                # then save them.
                if prefetch.contents and (not entry.contents
                                          or entry.contents.dirty):
                    entry.contents = self._save_contents(prefetch.contents)
Example #6
0
    def save(self, merge_disk_cache=True) -> None:
        """
        Update the disk cache from the in-memory cache.

        If there is already a disk cache then it is read first to merge any new entries
        into the in-memory cache. This handles the case where a different outrun session
        has written new cache entries to disk in the background. Conflicts are handled
        by keeping the most recently updated entry.

        The LRU cleanup runs after this merge has completed and deletes entries and
        cached contents until the cache is below the specified limits again.

        Files with cached contents that are no longer referenced by the cache afterwards
        are deleted from the disk.
        """
        with fasteners.InterProcessLock(self._cache_lock_path):
            if merge_disk_cache:
                # Load latest cache entries in disk cache
                disk_entries = {}

                try:
                    disk_entries = self._read_disk_entries()
                except FileNotFoundError:
                    log.debug("no disk cache to merge with")
                except Exception as e:
                    log.error(f"not merging with existing disk cache: {e}")

                # Merge them with in-memory cache
                for key, disk_entry in disk_entries.items():
                    if disk_entry.newer_than(self._entries.get(key)):
                        self._entries[key] = disk_entry

            # LRU pass
            self._lru_cleanup()

            # Delete cached contents that are no longer referenced
            self._garbage_collect_blobs()

            with open(self._cache_index_path, "w") as f:
                self._encoding.dump_json(self._entries, f)