def test_add_source(self, df: DataFlowGraph): source = Source( name="test-source", node_type="test-type", target="test-app", ) df.add_streaming_app(self.get_k8s_app()) df.add_source(source) assert len(df.graph.nodes) == 5 assert df.graph.has_edge("test-source", "test-app")
def test_add_source(self, df: DataFlowGraph): source = Source( name="test-source", node_type="test-type", target="test-app", ) df.add_streaming_app(K8sApp.factory(get_streaming_app_deployment())) df.add_source(source) assert len(df.graph.nodes) == 5 assert df.graph.has_edge("test-source", "test-app") assert len(df.pipelines) == 0
class StreamsExplorer: context = settings.k8s.deployment.context namespaces = settings.k8s.deployment.namespaces def __init__(self, linking_service: LinkingService, metric_provider: Type[MetricProvider]): self.applications: Dict[str, K8sApp] = {} self.kafka_connectors: List[KafkaConnector] = [] self.data_flow = DataFlowGraph(metric_provider=metric_provider) self.linking_service = linking_service def setup(self): self.__setup_k8s_environment() def update(self): self.applications = {} self.kafka_connectors = [] extractor_container.reset() self.data_flow.reset() self.__retrieve_deployments() self.__retrieve_cron_jobs() self.__get_connectors() self.__create_graph() def get_positioned_json_graph(self) -> dict: return self.data_flow.get_positioned_graph() def get_positioned_pipeline_json_graph(self, pipeline_name) -> dict: return self.data_flow.get_positioned_pipeline_graph(pipeline_name) def get_pipeline_names(self) -> List[str]: return list(self.data_flow.independent_graphs.keys()) def get_metrics(self) -> List: return self.data_flow.get_metrics() def get_node_information(self, node_id: str): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.connector_info + get_displayed_information_connector(config), ) if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.topic_info + [ NodeInfoListItem( name="Schema", value=SchemaRegistry.get_newest_topic_value_schema( node_id), type=NodeInfoType.JSON, ) ], ) if node_type == NodeTypesEnum.STREAMING_APP: info = get_displayed_information_deployment( self.applications[node_id]) return NodeInformation( node_id=node_id, node_type=node_type, info=self.linking_service.streaming_app_info + info, ) if node_type in self.linking_service.sink_source_info: return NodeInformation( node_id=node_id, node_type=NodeTypesEnum.SINK_SOURCE, info=self.linking_service.sink_source_info[node_type], ) def get_link(self, node_id: str, link_type: Optional[str]): node_type = self.data_flow.get_node_type(node_id) if node_type == NodeTypesEnum.CONNECTOR: config = KafkaConnect.get_connector_config(node_id) return self.linking_service.get_redirect_connector( config, link_type) if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC: return self.linking_service.get_redirect_topic(node_id, link_type) if node_type == NodeTypesEnum.STREAMING_APP: return self.linking_service.get_redirect_streaming_app( self.applications[node_id], link_type) if node_type in self.linking_service.sink_source_redirects: return self.linking_service.get_sink_source_redirects( node_type, node_id) def __setup_k8s_environment(self): try: if settings.k8s.deployment.cluster: logger.info("Setup K8s environment in cluster") kubernetes.config.load_incluster_config() else: logger.info("Setup K8s environment") kubernetes.config.load_kube_config(context=self.context) except kubernetes.config.ConfigException: raise Exception("Could not load K8s environment configuration") self.k8s_app_client = kubernetes.client.AppsV1Api() self.k8s_batch_client = kubernetes.client.BatchV1beta1Api() def __retrieve_deployments(self): items = self.get_deployments() + self.get_stateful_sets() for item in items: try: app = K8sApp.factory(item) if app.is_streams_bootstrap_app(): self.applications[app.name] = app except Exception as e: logger.debug(e) def get_deployments(self) -> List[V1Deployment]: deployments: List[V1Deployment] = [] for namespace in self.namespaces: logger.info(f"List deployments in namespace {namespace}") deployments += self.k8s_app_client.list_namespaced_deployment( namespace=namespace, watch=False).items return deployments def get_stateful_sets(self) -> List[V1StatefulSet]: stateful_sets: List[V1StatefulSet] = [] for namespace in self.namespaces: logger.info(f"List statefulsets in namespace {namespace}") stateful_sets += self.k8s_app_client.list_namespaced_stateful_set( namespace=namespace, watch=False).items return stateful_sets def __retrieve_cron_jobs(self): logger.info("Retrieve cronjob descriptions") cron_jobs = self.get_cron_jobs() for cron_job in cron_jobs: app: Optional[K8sApp] = extractor_container.on_cron_job(cron_job) if app: self.applications[app.name] = app def get_cron_jobs(self) -> List[V1beta1CronJob]: cron_jobs: List[V1beta1CronJob] = [] for namespace in self.namespaces: logger.info(f"List cronjobs in namespace {namespace}") cron_jobs += self.k8s_batch_client.list_namespaced_cron_job( namespace=namespace, watch=False).items return cron_jobs def __get_connectors(self): logger.info("Retrieve Kafka connectors") self.kafka_connectors = KafkaConnect.connectors() def __create_graph(self): logger.info("Setup pipeline graph") for _, app in self.applications.items(): self.data_flow.add_streaming_app(app) for connector in self.kafka_connectors: self.data_flow.add_connector(connector) sources, sinks = extractor_container.get_sources_sinks() for source in sources: self.data_flow.add_source(source) for sink in sinks: self.data_flow.add_sink(sink) # extract subgraphs logger.info("Extract independent pipelines") self.data_flow.extract_independent_pipelines()
def test_multiple_pipelines_sink_source(self, df: DataFlowGraph): df.add_streaming_app( K8sApp.factory( get_streaming_app_deployment( name="test-app1", input_topics="input-topic1", error_topic="error-topic1", output_topic="output-topic1", pipeline="pipeline1", ) ) ) df.add_streaming_app( K8sApp.factory( get_streaming_app_deployment( name="test-app2", input_topics="input-topic2", error_topic="error-topic2", output_topic="output-topic2", pipeline="pipeline2", ) ) ) assert len(df.pipelines) == 2 assert "pipeline1" in df.pipelines assert "pipeline2" in df.pipelines pipeline1 = df.pipelines["pipeline1"] pipeline2 = df.pipelines["pipeline2"] assert set(pipeline1.nodes) == { "test-app1", "input-topic1", "output-topic1", "error-topic1", } assert set(pipeline2.nodes) == { "test-app2", "input-topic2", "output-topic2", "error-topic2", } sink_connector = KafkaConnector( name="test-sink-connector", type=KafkaConnectorTypesEnum.SINK, topics=["output-topic1", "output-topic2"], config={}, ) df.add_connector(sink_connector) assert "test-sink-connector" in df.graph.nodes assert "test-sink-connector" in pipeline1.nodes assert "test-sink-connector" in pipeline2.nodes df.add_sink(Sink("test-sink", "test-sink-connector")) assert "test-sink" in df.graph.nodes assert "test-sink" in pipeline1.nodes assert "test-sink" in pipeline2.nodes source_connector = KafkaConnector( name="test-source-connector", type=KafkaConnectorTypesEnum.SOURCE, topics=["input-topic1", "input-topic2"], config={}, ) df.add_connector(source_connector) assert "test-source-connector" in df.graph.nodes assert "test-source-connector" in pipeline1.nodes assert "test-source-connector" in pipeline2.nodes df.add_source(Source("test-source", "test-source-connector")) assert "test-source" in df.graph.nodes assert "test-source" in pipeline1.nodes assert "test-source" in pipeline2.nodes unrelated_sink_connector = KafkaConnector( name="unrelated-sink-connector", type=KafkaConnectorTypesEnum.SINK, topics=["input-topic1"], config={}, ) df.add_connector(unrelated_sink_connector) assert "unrelated-sink-connector" in df.graph.nodes assert "unrelated-sink-connector" not in pipeline1.nodes assert "unrelated-sink-connector" not in pipeline2.nodes unrelated_source_connector = KafkaConnector( name="unrelated-source-connector", type=KafkaConnectorTypesEnum.SOURCE, topics=["output-topic1"], config={}, ) df.add_connector(unrelated_source_connector) assert "unrelated-source-connector" in df.graph.nodes assert "unrelated-source-connector" not in pipeline1.nodes assert "unrelated-source-connector" not in pipeline2.nodes