Exemple #1
0
    def __init__(self,
                 token: str = None,
                 pubsub_url: str = None,
                 data_sources_config_url: str = None,
                 fetch_on_connect: bool = True,
                 data_topics: List[str] = None,
                 policy_store: BasePolicyStoreClient = None,
                 should_send_reports=None):
        """
        Keeps policy-stores (e.g. OPA) up to date with relevant data
        Obtains data configuration on startup from OPAL-server
        Uses Pub/Sub to subscribe to data update events, and fetches (using FetchingEngine) data from sources.

        Args:
            token (str, optional): Auth token to include in connections to OPAL server. Defaults to CLIENT_TOKEN.
            pubsub_url (str, optional): URL for Pub/Sub updates for data. Defaults to OPAL_SERVER_PUBSUB_URL.
            data_sources_config_url (str, optional): URL to retrive base data configuration. Defaults to DEFAULT_DATA_SOURCES_CONFIG_URL.
            fetch_on_connect (bool, optional): Should the update fetch basic data immediately upon connection/reconnection. Defaults to True.
            data_topics (List[str], optional): Topics of data to fetch and subscribe to. Defaults to DATA_TOPICS.
            policy_store (BasePolicyStoreClient, optional): Policy store client to use to store data. Defaults to DEFAULT_POLICY_STORE.
        """
        # Defaults
        token: str = token or opal_client_config.CLIENT_TOKEN
        pubsub_url: str = pubsub_url or opal_client_config.SERVER_PUBSUB_URL
        data_sources_config_url: str = data_sources_config_url or opal_client_config.DEFAULT_DATA_SOURCES_CONFIG_URL
        # Should the client use the default data source to fetch on connect
        self._fetch_on_connect = fetch_on_connect
        # The policy store we'll save data updates into
        self._policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
        # Pub/Sub topics we subscribe to for data updates
        self._data_topics = data_topics if data_topics is not None else opal_client_config.DATA_TOPICS
        self._should_send_reports = should_send_reports if should_send_reports is not None else opal_client_config.SHOULD_REPORT_ON_DATA_UPDATES
        # The pub/sub client for data updates
        self._client = None
        # The task running the Pub/Sub subcribing client
        self._subscriber_task = None
        # Data fetcher
        self._data_fetcher = DataFetcher()
        self._token = token
        self._server_url = pubsub_url
        self._data_sources_config_url = data_sources_config_url
        if self._token is None:
            self._extra_headers = None
        else:
            self._extra_headers = [get_authorization_header(self._token)]
        self._stopping = False
Exemple #2
0
    def __init__(
        self,
        token: str = None,
        pubsub_url: str = None,
        subscription_directories: List[str] = None,
        policy_store: BasePolicyStoreClient = None,
    ):
        """inits the policy updater.

        Args:
            token (str, optional): Auth token to include in connections to OPAL server. Defaults to CLIENT_TOKEN.
            pubsub_url (str, optional): URL for Pub/Sub updates for policy. Defaults to OPAL_SERVER_PUBSUB_URL.
            subscription_directories (List[str], optional): directories in the policy source repo to subscribe to.
                Defaults to POLICY_SUBSCRIPTION_DIRS. every time the directory is updated by a commit we will receive
                a message on its respective topic. we dedups directories with ancestral relation, and will only
                receive one message for each updated file.
            policy_store (BasePolicyStoreClient, optional): Policy store client to use to store policy code. Defaults to DEFAULT_POLICY_STORE.
        """
        # defaults
        token: str = token or opal_client_config.CLIENT_TOKEN
        pubsub_url: str = pubsub_url or opal_client_config.SERVER_PUBSUB_URL
        subscription_directories: List[str] = subscription_directories or opal_client_config.POLICY_SUBSCRIPTION_DIRS

        # The policy store we'll save policy modules into (i.e: OPA)
        self._policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
        # pub/sub server url and authentication data
        self._server_url = pubsub_url
        self._token = token
        if self._token is None:
            self._extra_headers = None
        else:
            self._extra_headers = [get_authorization_header(self._token)]
        # Pub/Sub topics we subscribe to for policy updates
        self._topics = pubsub_topics_from_directories(subscription_directories)
        # The pub/sub client for data updates
        self._client = None
        # The task running the Pub/Sub subcribing client
        self._subscriber_task = None
        self._stopping = False
        # policy fetcher - fetches policy bundles
        self._policy_fetcher = PolicyFetcher()
Exemple #3
0
async def update_policy_data(update: DataUpdate = None,
                             policy_store: BasePolicyStoreClient = None,
                             data_fetcher=None):
    """
    fetches policy data (policy configuration) from backend and updates it into policy-store (i.e. OPA)
    """
    policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
    if data_fetcher is None:
        data_fetcher = DataFetcher()
    # types
    urls: Dict[str, FetcherConfig] = None
    url_to_entry: Dict[str, DataSourceEntry] = None
    # if we have an actual specification for the update
    if update is not None:
        entries: List[DataSourceEntry] = update.entries
        urls = {entry.url: entry.config for entry in entries}
        url_to_entry = {entry.url: entry for entry in entries}
    # get the data for the update
    logger.info("Fetching policy data", urls=urls)
    # Urls may be None - fetch_policy_data has a default for None
    policy_data_by_urls = await data_fetcher.fetch_policy_data(urls)
    # save the data from the update
    for url in policy_data_by_urls:
        # get path to store the URL data (default mode (None) is as "" - i.e. as all the data at root)
        entry = url_to_entry.get(url, None)
        policy_store_path = "" if entry is None else entry.dst_path
        # None is not valid - use "" (protect from missconfig)
        if policy_store_path is None:
            policy_store_path = ""
        # fix opa_path (if not empty must start with "/" to be nested under data)
        if policy_store_path != "" and not policy_store_path.startswith("/"):
            policy_store_path = f"/{policy_store_path}"
        policy_data = policy_data_by_urls[url]
        logger.info(
            "Saving fetched data to policy-store: source url='{url}', destination path='{path}'",
            url=url,
            path=policy_store_path or '/')
        await policy_store.set_policy_data(policy_data, path=policy_store_path)
Exemple #4
0
    async def update_policy_data(self, update: DataUpdate = None, policy_store: BasePolicyStoreClient = None, data_fetcher=None):
        """
        fetches policy data (policy configuration) from backend and updates it into policy-store (i.e. OPA)
        """
        policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
        if data_fetcher is None:
            data_fetcher = DataFetcher()
        # types / defaults
        urls: List[Tuple[str, FetcherConfig]] = None
        entries: List[DataSourceEntry] = []
        # track the result of each url in order to report back
        reports: List[DataEntryReport] = []
        # if we have an actual specification for the update
        if update is not None:
            entries = update.entries
            urls = [(entry.url, entry.config) for entry in entries]

        # get the data for the update
        logger.info("Fetching policy data", urls=urls)
        # Urls may be None - handle_urls has a default for None
        policy_data_with_urls = await data_fetcher.handle_urls(urls)
        # Save the data from the update
        # We wrap our interaction with the policy store with a transaction  
        async with policy_store.transaction_context(update.id) as store_transaction:
            # for intelisense treat store_transaction as a PolicyStoreClient (which it proxies)
            store_transaction: BasePolicyStoreClient
            for (url, fetch_config, result), entry in itertools.zip_longest(policy_data_with_urls, entries):
                if not isinstance(result, Exception):
                    # get path to store the URL data (default mode (None) is as "" - i.e. as all the data at root)
                    policy_store_path = "" if entry is None else entry.dst_path
                    # None is not valid - use "" (protect from missconfig)
                    if policy_store_path is None:
                        policy_store_path = ""
                    # fix opa_path (if not empty must start with "/" to be nested under data)
                    if policy_store_path != "" and not policy_store_path.startswith("/"):
                        policy_store_path = f"/{policy_store_path}"
                    policy_data = result
                    # Create a report on the data-fetching
                    report = DataEntryReport(entry=entry, hash=self.calc_hash(policy_data), fetched=True)
                    logger.info(
                        "Saving fetched data to policy-store: source url='{url}', destination path='{path}'",
                        url=url,
                        path=policy_store_path or '/'
                    )
                    try:
                        await store_transaction.set_policy_data(policy_data, path=policy_store_path)
                        # No exception we we're able to save to the policy-store
                        report.saved = True
                        # save the report for the entry
                        reports.append(report)
                    except:
                        logger.exception("Failed to save data update to policy-store")
                        # we failed to save to policy-store
                        report.saved = False
                        # save the report for the entry
                        reports.append(report)
                        # re-raise so the context manager will be aware of the failure
                        raise
                else:
                    report = DataEntryReport(entry=entry, fetched=False, saved=False)
                    # save the report for the entry
                    reports.append(report)
        # should we send a report to defined callbackers?
        if self._should_send_reports:
            # spin off reporting (no need to wait on it)
            asyncio.create_task(self.report_update_results(update, reports, data_fetcher))