def test_add_streaming_app(self, df: DataFlowGraph): df.add_streaming_app(self.get_k8s_app()) assert len(df.graph.nodes) == 4 assert df.graph.has_edge("input-topic", "test-app") assert df.graph.has_edge("test-app", "output-topic") assert df.graph.has_edge("test-app", "error-topic") # should have multiple input topic df.reset() df.add_streaming_app(self.get_k8s_app(input_topics="input-topic1,input-topic2")) assert len(df.graph.nodes) == 5 assert df.graph.has_edge("input-topic1", "test-app") assert df.graph.has_edge("input-topic2", "test-app") assert df.graph.has_edge("test-app", "output-topic") assert df.graph.has_edge("test-app", "error-topic") df.reset() df.add_streaming_app( self.get_k8s_app(multiple_outputs="1=extra-output1,2=extra-output2") ) assert len(df.graph.nodes) == 6 assert df.graph.has_edge("input-topic", "test-app") assert df.graph.has_edge("test-app", "output-topic") assert df.graph.has_edge("test-app", "error-topic") assert df.graph.has_edge("test-app", "extra-output1") assert df.graph.has_edge("test-app", "extra-output2")
class StreamsExplorer: context = settings.k8s.deployment.context namespaces = settings.k8s.deployment.namespaces def __init__(self, linking_service: LinkingService, metric_provider: Type[MetricProvider]): self.applications: Dict[str, K8sApp] = {} self.kafka_connectors: List[KafkaConnector] = [] self.data_flow = DataFlowGraph(metric_provider=metric_provider) self.linking_service = linking_service self.kafka = Kafka() def setup(self): self.__setup_k8s_environment() async def update(self): self.applications = {} self.kafka_connectors = [] extractor_container.reset() self.data_flow.reset() self.__retrieve_deployments() self.__retrieve_cron_jobs() self.__get_connectors() self.__create_graph() self.data_flow.setup_metric_provider() await self.data_flow.store_json_graph() def get_positioned_json_graph(self) -> dict: return self.data_flow.json_graph async def get_positioned_pipeline_json_graph( self, pipeline_name: str) -> Optional[dict]: return await self.data_flow.get_positioned_pipeline_graph(pipeline_name ) def get_pipeline_names(self) -> List[str]: return list(self.data_flow.pipelines.keys()) async def get_metrics(self) -> List[Metric]: return await self.data_flow.get_metrics() @ttl_cache(ttl=settings.node_info.cache_ttl) def get_node_information(self, node_id: str): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.connector_info + get_displayed_information_connector(config), ) elif node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: info = self.linking_service.topic_info if self.kafka.enabled: partitions = self.kafka.get_topic_partitions(node_id) if partitions is not None: info.append( NodeInfoListItem( name="Partitions", value=len(partitions), type=NodeInfoType.BASIC, )) config = self.kafka.get_topic_config(node_id) info += get_displayed_information_topic(config) info.append( NodeInfoListItem( name="Schema", value={}, type=NodeInfoType.JSON, )) return NodeInformation( node_id=node_id, node_type=node_type, info=info, ) elif node_type == NodeTypesEnum.STREAMING_APP: info = get_displayed_information_deployment( self.applications[node_id]) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.streaming_app_info + info, ) elif node_type in self.linking_service.sink_source_info: return NodeInformation( node_id=node_id, node_type=NodeTypesEnum.SINK_SOURCE, info=self.linking_service.sink_source_info[node_type], ) def get_link(self, node_id: str, link_type: Optional[str]): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return self.linking_service.get_redirect_connector( config, link_type) if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: return self.linking_service.get_redirect_topic(node_id, link_type) if node_type == NodeTypesEnum.STREAMING_APP: return self.linking_service.get_redirect_streaming_app( self.applications[node_id], link_type) if node_type in self.linking_service.sink_source_redirects: return self.linking_service.get_sink_source_redirects( node_type, node_id) def __setup_k8s_environment(self): try: if settings.k8s.deployment.cluster: logger.info("Setup K8s environment in cluster") kubernetes.config.load_incluster_config() else: logger.info("Setup K8s environment") kubernetes.config.load_kube_config(context=self.context) except kubernetes.config.ConfigException: raise Exception("Could not load K8s environment configuration") self.k8s_app_client = kubernetes.client.AppsV1Api() self.k8s_batch_client = kubernetes.client.BatchV1beta1Api() def __retrieve_deployments(self): items: List[K8sObject] = [] items += self.get_deployments() items += self.get_stateful_sets() for item in items: try: app = K8sApp.factory(item) self.__add_app(app) except Exception as e: logger.debug(e) def get_deployments(self) -> List[V1Deployment]: deployments: List[V1Deployment] = [] for namespace in self.namespaces: logger.info(f"List deployments in namespace {namespace}") deployments += self.k8s_app_client.list_namespaced_deployment( namespace=namespace, watch=False).items return deployments def get_stateful_sets(self) -> List[V1StatefulSet]: stateful_sets: List[V1StatefulSet] = [] for namespace in self.namespaces: logger.info(f"List statefulsets in namespace {namespace}") stateful_sets += self.k8s_app_client.list_namespaced_stateful_set( namespace=namespace, watch=False).items return stateful_sets def __retrieve_cron_jobs(self): logger.info("Retrieve cronjob descriptions") cron_jobs = self.get_cron_jobs() for cron_job in cron_jobs: if app := extractor_container.on_cron_job(cron_job): self.__add_app(app)
class StreamsExplorer: context = settings.k8s.deployment.context namespaces = settings.k8s.deployment.namespaces def __init__(self, linking_service: LinkingService, metric_provider: Type[MetricProvider]): self.applications: Dict[str, K8sApp] = {} self.kafka_connectors: List[KafkaConnector] = [] self.data_flow = DataFlowGraph(metric_provider=metric_provider) self.linking_service = linking_service def setup(self): self.__setup_k8s_environment() def update(self): self.applications = {} self.kafka_connectors = [] extractor_container.reset() self.data_flow.reset() self.__retrieve_deployments() self.__retrieve_cron_jobs() self.__get_connectors() self.__create_graph() def get_positioned_json_graph(self) -> dict: return self.data_flow.get_positioned_graph() def get_positioned_pipeline_json_graph(self, pipeline_name) -> dict: return self.data_flow.get_positioned_pipeline_graph(pipeline_name) def get_pipeline_names(self) -> List[str]: return list(self.data_flow.independent_graphs.keys()) def get_metrics(self) -> List: return self.data_flow.get_metrics() def get_node_information(self, node_id: str): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.connector_info + get_displayed_information_connector(config), ) if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.topic_info + [ NodeInfoListItem( name="Schema", value=SchemaRegistry.get_newest_topic_value_schema( node_id), type=NodeInfoType.JSON, ) ], ) if node_type == NodeTypesEnum.STREAMING_APP: info = get_displayed_information_deployment( self.applications[node_id]) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.streaming_app_info + info, ) if node_type in self.linking_service.sink_source_info: return NodeInformation( node_id=node_id, node_type=NodeTypesEnum.SINK_SOURCE, info=self.linking_service.sink_source_info[node_type], ) def get_link(self, node_id: str, link_type: Optional[str]): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return self.linking_service.get_redirect_connector( config, link_type) if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: return self.linking_service.get_redirect_topic(node_id, link_type) if node_type == NodeTypesEnum.STREAMING_APP: return self.linking_service.get_redirect_streaming_app( self.applications[node_id], link_type) if node_type in self.linking_service.sink_source_redirects: return self.linking_service.get_sink_source_redirects( node_type, node_id) def __setup_k8s_environment(self): try: if settings.k8s.deployment.cluster: logger.info("Setup K8s environment in cluster") kubernetes.config.load_incluster_config() else: logger.info("Setup K8s environment") kubernetes.config.load_kube_config(context=self.context) except kubernetes.config.ConfigException: raise Exception("Could not load K8s environment configuration") self.k8s_app_client = kubernetes.client.AppsV1Api() self.k8s_batch_client = kubernetes.client.BatchV1beta1Api() def __retrieve_deployments(self): items = self.get_deployments() + self.get_stateful_sets() for item in items: try: app = K8sApp.factory(item) if app.is_streams_bootstrap_app(): self.applications[app.name] = app except Exception as e: logger.debug(e) def get_deployments(self) -> List[V1Deployment]: deployments: List[V1Deployment] = [] for namespace in self.namespaces: logger.info(f"List deployments in namespace {namespace}") deployments += self.k8s_app_client.list_namespaced_deployment( namespace=namespace, watch=False).items return deployments def get_stateful_sets(self) -> List[V1StatefulSet]: stateful_sets: List[V1StatefulSet] = [] for namespace in self.namespaces: logger.info(f"List statefulsets in namespace {namespace}") stateful_sets += self.k8s_app_client.list_namespaced_stateful_set( namespace=namespace, watch=False).items return stateful_sets def __retrieve_cron_jobs(self): logger.info("Retrieve cronjob descriptions") cron_jobs = self.get_cron_jobs() for cron_job in cron_jobs: app: Optional[K8sApp] = extractor_container.on_cron_job(cron_job) if app: self.applications[app.name] = app def get_cron_jobs(self) -> List[V1beta1CronJob]: cron_jobs: List[V1beta1CronJob] = [] for namespace in self.namespaces: logger.info(f"List cronjobs in namespace {namespace}") cron_jobs += self.k8s_batch_client.list_namespaced_cron_job( namespace=namespace, watch=False).items return cron_jobs def __get_connectors(self): logger.info("Retrieve Kafka connectors") self.kafka_connectors = KafkaConnect.connectors() def __create_graph(self): logger.info("Setup pipeline graph") for _, app in self.applications.items(): self.data_flow.add_streaming_app(app) for connector in self.kafka_connectors: self.data_flow.add_connector(connector) sources, sinks = extractor_container.get_sources_sinks() for source in sources: self.data_flow.add_source(source) for sink in sinks: self.data_flow.add_sink(sink) # extract subgraphs logger.info("Extract independent pipelines") self.data_flow.extract_independent_pipelines()