예제 #1
0
 def test_add_source(self, df: DataFlowGraph):
     source = Source(
         name="test-source",
         node_type="test-type",
         target="test-app",
     )
     df.add_streaming_app(self.get_k8s_app())
     df.add_source(source)
     assert len(df.graph.nodes) == 5
     assert df.graph.has_edge("test-source", "test-app")
 def test_add_source(self, df: DataFlowGraph):
     source = Source(
         name="test-source",
         node_type="test-type",
         target="test-app",
     )
     df.add_streaming_app(K8sApp.factory(get_streaming_app_deployment()))
     df.add_source(source)
     assert len(df.graph.nodes) == 5
     assert df.graph.has_edge("test-source", "test-app")
     assert len(df.pipelines) == 0
class StreamsExplorer:
    context = settings.k8s.deployment.context
    namespaces = settings.k8s.deployment.namespaces

    def __init__(self, linking_service: LinkingService,
                 metric_provider: Type[MetricProvider]):
        self.applications: Dict[str, K8sApp] = {}
        self.kafka_connectors: List[KafkaConnector] = []
        self.data_flow = DataFlowGraph(metric_provider=metric_provider)
        self.linking_service = linking_service

    def setup(self):
        self.__setup_k8s_environment()

    def update(self):
        self.applications = {}
        self.kafka_connectors = []
        extractor_container.reset()
        self.data_flow.reset()
        self.__retrieve_deployments()
        self.__retrieve_cron_jobs()
        self.__get_connectors()
        self.__create_graph()

    def get_positioned_json_graph(self) -> dict:
        return self.data_flow.get_positioned_graph()

    def get_positioned_pipeline_json_graph(self, pipeline_name) -> dict:
        return self.data_flow.get_positioned_pipeline_graph(pipeline_name)

    def get_pipeline_names(self) -> List[str]:
        return list(self.data_flow.independent_graphs.keys())

    def get_metrics(self) -> List:
        return self.data_flow.get_metrics()

    def get_node_information(self, node_id: str):
        node_type = self.data_flow.get_node_type(node_id)
        if node_type == NodeTypesEnum.CONNECTOR:
            config = KafkaConnect.get_connector_config(node_id)
            return NodeInformation(
                node_id=node_id,
                node_type=node_type,
                info=self.linking_service.connector_info +
                get_displayed_information_connector(config),
            )
        if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC:
            return NodeInformation(
                node_id=node_id,
                node_type=node_type,
                info=self.linking_service.topic_info + [
                    NodeInfoListItem(
                        name="Schema",
                        value=SchemaRegistry.get_newest_topic_value_schema(
                            node_id),
                        type=NodeInfoType.JSON,
                    )
                ],
            )
        if node_type == NodeTypesEnum.STREAMING_APP:
            info = get_displayed_information_deployment(
                self.applications[node_id])
            return NodeInformation(
                node_id=node_id,
                node_type=node_type,
                info=self.linking_service.streaming_app_info + info,
            )

        if node_type in self.linking_service.sink_source_info:
            return NodeInformation(
                node_id=node_id,
                node_type=NodeTypesEnum.SINK_SOURCE,
                info=self.linking_service.sink_source_info[node_type],
            )

    def get_link(self, node_id: str, link_type: Optional[str]):
        node_type = self.data_flow.get_node_type(node_id)
        if node_type == NodeTypesEnum.CONNECTOR:
            config = KafkaConnect.get_connector_config(node_id)
            return self.linking_service.get_redirect_connector(
                config, link_type)
        if node_type == NodeTypesEnum.TOPIC or node_type == NodeTypesEnum.ERROR_TOPIC:
            return self.linking_service.get_redirect_topic(node_id, link_type)
        if node_type == NodeTypesEnum.STREAMING_APP:
            return self.linking_service.get_redirect_streaming_app(
                self.applications[node_id], link_type)

        if node_type in self.linking_service.sink_source_redirects:
            return self.linking_service.get_sink_source_redirects(
                node_type, node_id)

    def __setup_k8s_environment(self):
        try:
            if settings.k8s.deployment.cluster:
                logger.info("Setup K8s environment in cluster")
                kubernetes.config.load_incluster_config()
            else:
                logger.info("Setup K8s environment")
                kubernetes.config.load_kube_config(context=self.context)
        except kubernetes.config.ConfigException:
            raise Exception("Could not load K8s environment configuration")

        self.k8s_app_client = kubernetes.client.AppsV1Api()
        self.k8s_batch_client = kubernetes.client.BatchV1beta1Api()

    def __retrieve_deployments(self):
        items = self.get_deployments() + self.get_stateful_sets()
        for item in items:
            try:
                app = K8sApp.factory(item)
                if app.is_streams_bootstrap_app():
                    self.applications[app.name] = app
            except Exception as e:
                logger.debug(e)

    def get_deployments(self) -> List[V1Deployment]:
        deployments: List[V1Deployment] = []
        for namespace in self.namespaces:
            logger.info(f"List deployments in namespace {namespace}")
            deployments += self.k8s_app_client.list_namespaced_deployment(
                namespace=namespace, watch=False).items
        return deployments

    def get_stateful_sets(self) -> List[V1StatefulSet]:
        stateful_sets: List[V1StatefulSet] = []
        for namespace in self.namespaces:
            logger.info(f"List statefulsets in namespace {namespace}")
            stateful_sets += self.k8s_app_client.list_namespaced_stateful_set(
                namespace=namespace, watch=False).items
        return stateful_sets

    def __retrieve_cron_jobs(self):
        logger.info("Retrieve cronjob descriptions")
        cron_jobs = self.get_cron_jobs()
        for cron_job in cron_jobs:
            app: Optional[K8sApp] = extractor_container.on_cron_job(cron_job)
            if app:
                self.applications[app.name] = app

    def get_cron_jobs(self) -> List[V1beta1CronJob]:
        cron_jobs: List[V1beta1CronJob] = []
        for namespace in self.namespaces:
            logger.info(f"List cronjobs in namespace {namespace}")
            cron_jobs += self.k8s_batch_client.list_namespaced_cron_job(
                namespace=namespace, watch=False).items
        return cron_jobs

    def __get_connectors(self):
        logger.info("Retrieve Kafka connectors")
        self.kafka_connectors = KafkaConnect.connectors()

    def __create_graph(self):
        logger.info("Setup pipeline graph")
        for _, app in self.applications.items():
            self.data_flow.add_streaming_app(app)

        for connector in self.kafka_connectors:
            self.data_flow.add_connector(connector)

        sources, sinks = extractor_container.get_sources_sinks()
        for source in sources:
            self.data_flow.add_source(source)

        for sink in sinks:
            self.data_flow.add_sink(sink)

        # extract subgraphs
        logger.info("Extract independent pipelines")
        self.data_flow.extract_independent_pipelines()
    def test_multiple_pipelines_sink_source(self, df: DataFlowGraph):
        df.add_streaming_app(
            K8sApp.factory(
                get_streaming_app_deployment(
                    name="test-app1",
                    input_topics="input-topic1",
                    error_topic="error-topic1",
                    output_topic="output-topic1",
                    pipeline="pipeline1",
                )
            )
        )
        df.add_streaming_app(
            K8sApp.factory(
                get_streaming_app_deployment(
                    name="test-app2",
                    input_topics="input-topic2",
                    error_topic="error-topic2",
                    output_topic="output-topic2",
                    pipeline="pipeline2",
                )
            )
        )
        assert len(df.pipelines) == 2
        assert "pipeline1" in df.pipelines
        assert "pipeline2" in df.pipelines
        pipeline1 = df.pipelines["pipeline1"]
        pipeline2 = df.pipelines["pipeline2"]
        assert set(pipeline1.nodes) == {
            "test-app1",
            "input-topic1",
            "output-topic1",
            "error-topic1",
        }
        assert set(pipeline2.nodes) == {
            "test-app2",
            "input-topic2",
            "output-topic2",
            "error-topic2",
        }

        sink_connector = KafkaConnector(
            name="test-sink-connector",
            type=KafkaConnectorTypesEnum.SINK,
            topics=["output-topic1", "output-topic2"],
            config={},
        )
        df.add_connector(sink_connector)
        assert "test-sink-connector" in df.graph.nodes
        assert "test-sink-connector" in pipeline1.nodes
        assert "test-sink-connector" in pipeline2.nodes

        df.add_sink(Sink("test-sink", "test-sink-connector"))
        assert "test-sink" in df.graph.nodes
        assert "test-sink" in pipeline1.nodes
        assert "test-sink" in pipeline2.nodes

        source_connector = KafkaConnector(
            name="test-source-connector",
            type=KafkaConnectorTypesEnum.SOURCE,
            topics=["input-topic1", "input-topic2"],
            config={},
        )
        df.add_connector(source_connector)
        assert "test-source-connector" in df.graph.nodes
        assert "test-source-connector" in pipeline1.nodes
        assert "test-source-connector" in pipeline2.nodes

        df.add_source(Source("test-source", "test-source-connector"))
        assert "test-source" in df.graph.nodes
        assert "test-source" in pipeline1.nodes
        assert "test-source" in pipeline2.nodes

        unrelated_sink_connector = KafkaConnector(
            name="unrelated-sink-connector",
            type=KafkaConnectorTypesEnum.SINK,
            topics=["input-topic1"],
            config={},
        )
        df.add_connector(unrelated_sink_connector)
        assert "unrelated-sink-connector" in df.graph.nodes
        assert "unrelated-sink-connector" not in pipeline1.nodes
        assert "unrelated-sink-connector" not in pipeline2.nodes

        unrelated_source_connector = KafkaConnector(
            name="unrelated-source-connector",
            type=KafkaConnectorTypesEnum.SOURCE,
            topics=["output-topic1"],
            config={},
        )
        df.add_connector(unrelated_source_connector)
        assert "unrelated-source-connector" in df.graph.nodes
        assert "unrelated-source-connector" not in pipeline1.nodes
        assert "unrelated-source-connector" not in pipeline2.nodes