Example #1
0
async def publish_changed_directories(
        old_commit: Commit,
        new_commit: Commit,
        publisher: TopicPublisher,
        file_extensions: Optional[List[str]] = None):
    """
    publishes policy topics matching all relevant directories in tracked repo,
    prompting the client to ask for *all* contents of these directories (and not just diffs).
    """
    if new_commit == old_commit:
        return await publish_all_directories_in_repo(
            old_commit,
            new_commit,
            publisher=publisher,
            file_extensions=file_extensions)

    with DiffViewer(old_commit, new_commit) as viewer:

        def has_extension(path: Path) -> bool:
            if not file_extensions:
                return True
            return path.suffix in file_extensions

        all_paths = list(viewer.affected_paths(has_extension))
        if not all_paths:
            logger.warning(
                f"new commits detected but no tracked files were affected: '{old_commit.hexsha}' -> '{new_commit.hexsha}'",
                old_commit=old_commit,
                new_commit=new_commit)
            return
        directories = PathUtils.intermediate_directories(all_paths)
        logger.info("Publishing policy update, directories: {directories}",
                    directories=[str(d) for d in directories])
        topics = policy_topics(directories)
        publisher.publish(topics=topics, data=new_commit.hexsha)
Example #2
0
    def _verify_crypto_keys(self):
        """
        verifies whether or not valid crypto keys were provided to the signer.
        if both keys are valid, encodes and decodes a JWT to make sure the keys match.

        if both private and public keys are valid and are matching => signer is enabled
        if both private and public keys are None => signer is disabled (self.enabled == False)
        if only one key is valid/not-None => throws ValueError
        any other case => throws ValueError
        """
        if self._private_key is not None and self._public_key is not None:
            # both keys provided, let's make sure these keys were generated correctly
            token = jwt.encode({"some": "payload"}, self._private_key, algorithm=self._algorithm)
            try:
                jwt.decode(token, self._public_key, algorithms=[self._algorithm])
            except jwt.PyJWTError as exc:
                logger.info("JWT Signer key verification failed with error: {err}", err=exc)
                raise InvalidJWTCryptoKeysException("private key and public key do not match!") from exc
            # save jwk
            self._jwk: PyJWK = PyJWK.from_json(self.get_jwk(), algorithm=self._algorithm)
        elif (self._private_key != self._public_key) and (self._private_key is None or self._public_key is None):
            raise ValueError("JWT Signer not valid, only one of private key / public key pair was provided!")
        elif self._private_key is None and self._public_key is None:
            # valid situation, running in dev mode and api security is off
            self._enabled = False
            logger.info("OPAL was not provided with JWT encryption keys, cannot verify api requests!")
        else:
            raise ValueError("Invalid JWT Signer input!")
Example #3
0
 async def get_data_sources_config(
         authorization: Optional[str] = Header(None)):
     """
     Provides OPAL clients with their base data config, meaning from where they should
     fetch a *complete* picture of the policy data they need. Clients will use this config
     to pull all data when they initially load and when they are reconnected to server after
     a period of disconnection (in which they cannot receive incremental updates).
     """
     token = get_token_from_header(authorization)
     if data_sources_config.config is not None:
         logger.info("Serving source configuration")
         return data_sources_config.config
     elif data_sources_config.external_source_url is not None:
         url = str(data_sources_config.external_source_url)
         short_token = token[:5] + "..." + token[-5:]
         logger.info(
             "Source configuration is available at '{url}', redirecting with token={token} (abbrv.)",
             url=url,
             token=short_token)
         redirect_url = set_url_query_param(url, 'token', token)
         return RedirectResponse(url=redirect_url)
     else:
         logger.error("pydantic model invalid", model=data_sources_config)
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail="Did not find a data source configuration!")
Example #4
0
    def publish_data_updates(self, update: DataUpdate):
        """
        Notify OPAL subscribers of a new data update by topic

        Args:
            topics (List[str]): topics (with hierarchy) to notify subscribers of
            update (DataUpdate): update data-source configuration for subscribers to fetch data from
        """
        all_topic_combos = []
        # Expand the topics for each event to include sub topic combos (e.g. publish 'a/b/c' as 'a' , 'a/b', and 'a/b/c')
        for entry in update.entries:
            for topic in entry.topics:
                topic_combos = self.get_topic_combos(topic)
                all_topic_combos.extend(topic_combos)

        # a nicer format of entries to the log
        logged_entries = [(entry.url, entry.save_method, entry.dst_path or "/")
                          for entry in update.entries]

        # publish all topics with all their sub combinations
        logger.info(
            "[{pid}] Publishing data update to topics: {topics}, reason: {reason}, entries: {entries}",
            pid=os.getpid(),
            topics=all_topic_combos,
            reason=update.reason,
            entries=logged_entries,
        )
        self._publisher.publish(all_topic_combos, update)
Example #5
0
    async def start_server_background_tasks(self):
        """
        starts the background processes (as asyncio tasks) if such are configured.

        all workers will start these tasks:
        - publisher: a client that is used to publish updates to the client.

        only the leader worker (first to obtain leadership lock) will start these tasks:
        - (repo) watcher: monitors the policy git repository for changes.
        """
        if self.publisher is not None:
            async with self.publisher:
                if self.watcher is not None:
                    # repo watcher is enabled, but we want only one worker to run it
                    # (otherwise for each new commit, we will publish multiple updates via pub/sub).
                    # leadership is determined by the first worker to obtain a lock
                    self.leadership_lock = NamedLock(
                        opal_server_config.LEADER_LOCK_FILE_PATH)
                    async with self.leadership_lock:
                        # only one worker gets here, the others block. in case the leader worker
                        # is terminated, another one will obtain the lock and become leader.
                        logger.info(
                            "leadership lock acquired, leader pid: {pid}",
                            pid=os.getpid())
                        logger.info(
                            "listening on webhook topic: '{topic}'",
                            topic=opal_server_config.POLICY_REPO_WEBHOOK_TOPIC)
                        # the leader listens to the webhook topic (webhook api route can be hit randomly in all workers)
                        # and triggers the watcher to check for changes in the tracked upstream remote.
                        await self.pubsub.endpoint.subscribe(
                            [opal_server_config.POLICY_REPO_WEBHOOK_TOPIC],
                            partial(trigger_repo_watcher_pull, self.watcher))
                        # running the watcher, and waiting until it stops (until self.watcher.signal_stop() is called)
                        async with self.watcher:
                            await self.watcher.wait_until_should_stop()
Example #6
0
 def start(self):
     """
     starts the repo watcher and registers a failure callback to terminate gracefully
     """
     logger.info("Launching repo watcher")
     self._watcher.on_git_failed(self._fail)
     self._tasks.append(asyncio.create_task(self._watcher.run()))
     self._init_should_stop()
Example #7
0
 async def log_client_update_report(report: DataUpdateReport):
     """
     A data update callback to be called by the OPAL client after completing an update.
     If the user deploying OPAL-client did not set OPAL_DEFAULT_UPDATE_CALLBACKS properly,
     this method will be called as the default callback (will simply log the report).
     """
     logger.info("Recieved update report: {report}", report=report.dict())
     return {}  # simply returns 200
Example #8
0
async def trigger_repo_watcher_pull(watcher: RepoWatcherTask, topic: Topic,
                                    data: Any):
    """
    triggers the policy watcher check for changes.
    will trigger a task on the watcher's thread.
    """
    logger.info("webhook listener triggered")
    watcher.trigger()
Example #9
0
 async def stop(self):
     """
     stops the publisher (cancels any running publishing tasks)
     """
     logger.info("stopping topic publisher")
     for task in self._tasks:
         if not task.done():
             task.cancel()
     await asyncio.gather(*self._tasks, return_exceptions=True)
Example #10
0
 def _trigger_shutdown(self):
     """
     this will send SIGTERM (Keyboard interrupt) to the worker, making uvicorn
     send "lifespan.shutdown" event to Starlette via the ASGI lifespan interface.
     Starlette will then trigger the @app.on_event("shutdown") callback, which
     in our case (self.stop_client_background_tasks()) will gracefully shutdown
     the background processes and only then will terminate the worker.
     """
     logger.info("triggering shutdown with SIGTERM...")
     os.kill(os.getpid(), signal.SIGTERM)
Example #11
0
 async def stop(self):
     """
     stops all repo watcher tasks
     """
     logger.info("Stopping repo watcher")
     await self._watcher.stop()
     for task in self._tasks:
         if not task.done():
             task.cancel()
     await asyncio.gather(*self._tasks, return_exceptions=True)
Example #12
0
 def start(self):
     """
     starts the pub/sub client and subscribes to the predefined topic.
     the client will attempt to connect to the pubsub server until successful.
     """
     logger.info("started topic listener, topics={topics}",
                 topics=self._topics)
     for topic in self._topics:
         self._client.subscribe(topic, self._callback)
     self._client.start_client(f"{self._server_uri}")
Example #13
0
 async def handle_url(self, url, config):
     """
     Helper function wrapping self._engine.handle_url
     """
     logger.info("Fetching data from url: {url}", url=url)
     try:
         # ask the engine to get our data
         response = await self._engine.handle_url(url, config=config)
         return response
     except asyncio.TimeoutError as e:
         logger.exception("Timeout while fetching url: {url}", url=url)
         raise
Example #14
0
    def _attempt_init_from_local_repo(self) -> CloneResult:
        """
        inits the repo from local .git or throws GitFailed
        """
        logger.info("Repo already exists in '{repo_path}'", repo_path=self.path)
        try:
            repo = Repo(self.path)
        except Exception as e:
            logger.exception("cannot init local repo: {error}", error=e)
            raise GitFailed(e)

        return LocalClone(repo)
Example #15
0
 async def _handle_url(self, url, config):
     """
     Helper function wrapping self._engine.handle_url, returning the fetched result with the url used for it
     """
     logger.info("Fetching data from url: {url}", url=url)
     try:
         # ask the engine to get our data
         response = await self._engine.handle_url(url, config=config)
         # store as part of all results
         return url, response
     except asyncio.TimeoutError as e:
         logger.exception("Timeout while fetching url: {url}", url=url)
         raise
Example #16
0
 async def launch_policy_store_dependent_tasks(self):
     try:
         for task in asyncio.as_completed([self.launch_policy_updater(), self.launch_data_updater()]):
             await task
     except websockets.exceptions.InvalidStatusCode as err:
         logger.error("Failed to launch background task -- {err}", err=err)
         logger.info("triggering shutdown with SIGTERM...")
         # this will send SIGTERM (Keyboard interrupt) to the worker, making uvicorn
         # send "lifespan.shutdown" event to Starlette via the ASGI lifespan interface.
         # Starlette will then trigger the @app.on_event("shutdown") callback, which
         # in our case (self.stop_client_background_tasks()) will gracefully shutdown
         # the background processes and only then will terminate the worker.
         os.kill(os.getpid(), signal.SIGTERM)
Example #17
0
 def clone(self) -> CloneResult:
     """
     initializes a git.Repo and returns the clone result.
     it either:
         - does not found a cloned repo locally and clones from remote url
         - finds a cloned repo locally and does not clone from remote.
     """
     logger.info("Cloning repo from '{url}' to '{to_path}'", url=self.url, to_path=self.path)
     git_path = Path(self.path) / Path(".git")
     if git_path.exists():
         return self._attempt_init_from_local_repo()
     else:
         return self._attempt_clone_from_url()
Example #18
0
File: api.py Project: shimont/opal
    async def trigger_git_webhook(
        request: Request, urls: List[str] = Depends(affected_repo_urls)):
        event = request.headers.get('X-GitHub-Event', 'ping')

        if opal_server_config.POLICY_REPO_URL is not None and opal_server_config.POLICY_REPO_URL in urls:
            logger.info("triggered webhook on repo: {repo}",
                        repo=urls[0],
                        hook_event=event)
            if event == 'push':
                # notifies the webhook listener via the pubsub broadcaster
                await pubsub_endpoint.publish(
                    opal_server_config.POLICY_REPO_WEBHOOK_TOPIC)
            return {"status": "ok", "event": event, "repo_url": urls[0]}

        return {"status": "ignored", "event": event}
Example #19
0
async def publish_all_directories_in_repo(
        old_commit: Commit,
        new_commit: Commit,
        publisher: TopicPublisher,
        file_extensions: Optional[List[str]] = None):
    """
    publishes policy topics matching all relevant directories in tracked repo,
    prompting the client to ask for *all* contents of these directories (and not just diffs).
    """
    with CommitViewer(new_commit) as viewer:
        filter = partial(has_extension, extensions=file_extensions)
        all_paths = list(viewer.files(filter))
        directories = PathUtils.intermediate_directories(all_paths)
        logger.info("Publishing policy update, directories: {directories}",
                    directories=[str(d) for d in directories])
        topics = policy_topics(directories)
        publisher.publish(topics=topics, data=new_commit.hexsha)
Example #20
0
 def _attempt_clone_from_url(self) -> CloneResult:
     """
     clones the repo from url or throws GitFailed
     """
     env = self._provide_git_ssh_environment()
     _clone_func = partial(Repo.clone_from, url=self.url, to_path=self.path, env=env)
     _clone_with_retries = retry(**self._retry_config)(_clone_func)
     try:
         repo = _clone_with_retries()
     except (GitError, GitCommandError) as e:
         logger.exception("cannot clone policy repo: {error}", error=e)
         raise GitFailed(e)
     except RetryError as e:
         logger.exception("cannot clone policy repo: {error}", error=e)
         raise GitFailed(e)
     else:
         logger.info("Clone succeeded", repo_path=self.path)
         return RemoteClone(repo)
Example #21
0
 async def websocket_rpc_endpoint(websocket: WebSocket,
                                  logged_in: bool = Depends(verifier)):
     """
     this is the main websocket endpoint the sidecar uses to register on policy updates.
     as you can see, this endpoint is protected by an HTTP Authorization Bearer token.
     """
     if not logged_in:
         logger.info(
             "Closing connection, remote address: {remote_address}",
             remote_address=websocket.client,
             reason="Authentication failed")
         await websocket.close()
         return
     # Init PubSub main-loop with or without broadcasting
     if broadcaster_uri is not None:
         async with self.endpoint.broadcaster:
             await self.endpoint.main_loop(websocket)
     else:
         await self.endpoint.main_loop(websocket)
Example #22
0
    async def stop_client_background_tasks(self):
        """
        stops all background tasks (called on shutdown event)
        """
        logger.info("stopping background tasks...")

        # stopping opa runner
        if self.opa_runner:
            await self.opa_runner.stop()

        # stopping updater tasks (each updater runs a pub/sub client)
        logger.info("trying to shutdown DataUpdater and PolicyUpdater gracefully...")
        tasks: List[asyncio.Task] = []
        if self.data_updater:
            tasks.append(asyncio.create_task(self.data_updater.stop()))
        if self.policy_updater:
            tasks.append(asyncio.create_task(self.policy_updater.stop()))

        try:
            await asyncio.gather(*tasks)
        except Exception:
            logger.exception("exception while shutting down updaters")
Example #23
0
    async def run(self):
        """
        clones the repo and potentially starts the polling task
        """
        try:
            result = self._cloner.clone()
        except GitFailed as e:
            await self._on_git_failed(e)
            return

        self._tracker = BranchTracker(repo=result.repo,
                                      branch_name=self._branch_name,
                                      remote_name=self._remote_name)
        # if the repo exists locally, we need to git pull when the watcher starts
        if not result.cloned_from_remote:
            self._tracker.pull()

        if (self._polling_interval > 0):
            logger.info("Launching polling task, interval: {interval} seconds",
                        interval=self._polling_interval)
            self._start_polling_task()
        else:
            logger.info("Polling task is off")
Example #24
0
 async def check_for_changes(self):
     """
     calling this method will trigger a git pull from the tracked remote.
     if after the pull the watcher detects new commits, it will call the
     callbacks registered with on_new_commits().
     """
     logger.info("Pulling changes from remote: '{remote}'",
                 remote=self._tracker.tracked_remote.name)
     has_changes, prev, latest = self._tracker.pull()
     if not has_changes:
         logger.info("No new commits: HEAD is at '{head}'",
                     head=latest.hexsha)
     else:
         logger.info(
             "Found new commits: old HEAD was '{prev_head}', new HEAD is '{new_head}'",
             prev_head=prev.hexsha,
             new_head=latest.hexsha)
         await self._on_new_commits(old=prev, new=latest)
Example #25
0
 async def stop(self):
     """
     stops the pubsub client
     """
     await self._client.disconnect()
     logger.info("stopped topic listener", topics=self._topics)
Example #26
0
 async def shutdown_event():
     logger.info("triggered shutdown event")
     if self.watcher is not None:
         self.watcher.signal_stop()
     if self.publisher is not None:
         asyncio.create_task(self.publisher.stop())
Example #27
0
 async def startup_event():
     logger.info("triggered startup event")
     asyncio.create_task(self.start_server_background_tasks())
Example #28
0
 async def trigger_policy_update():
     logger.info("triggered policy update from api")
     await update_policy(policy_store)
     return {"status": "ok"}
Example #29
0
 def start(self):
     """
     starts the publisher
     """
     logger.info("started topic publisher")
Example #30
0
 async def trigger_policy_update():
     logger.info("triggered policy update from api")
     await policy_updater.update_policy(force_full_update=True)
     return {"status": "ok"}