Esempio n. 1
0
    def test_reuse_default_cluster_if_not_configured(self):
        clusters = self.current_env.clusters
        runner = interactive_runner.InteractiveRunner(
            underlying_runner=FlinkRunner())
        options = PipelineOptions()
        # Pipeline is not configured to run on Cloud.
        p = beam.Pipeline(runner=runner, options=options)
        meta = ClusterMetadata(project_id='test-project', region='test-region')
        meta.master_url = 'test-url'
        meta.dashboard = 'test-dashboard'
        dcm = DataprocClusterManager(meta)
        # Configure the clusters so that a default cluster is known.
        clusters.dataproc_cluster_managers[meta] = dcm
        clusters.set_default_cluster(meta)
        runner.configure_for_flink(p, options)

        # The default cluster is used.
        tuned_meta = clusters.cluster_metadata(p)
        self.assertIs(tuned_meta, clusters.default_cluster_metadata)
        # The pipeline is known.
        self.assertIn(p, clusters.pipelines)
        registered_dcm = clusters.pipelines[p]
        self.assertIn(p, registered_dcm.pipelines)
        # The pipeline options is tuned for execution on the cluster.
        flink_options = options.view_as(FlinkRunnerOptions)
        self.assertEqual(flink_options.flink_master, tuned_meta.master_url)
        self.assertEqual(flink_options.flink_version,
                         clusters.DATAPROC_FLINK_VERSION)
Esempio n. 2
0
    def test_create_but_reuse_a_known_cluster(self):
        known_meta = ClusterMetadata(project_id='test-project',
                                     region='test-region')
        known_dcm = DataprocClusterManager(known_meta)
        known_meta.master_url = 'test-url'
        self.clusters.set_default_cluster(known_meta)
        self.clusters.dataproc_cluster_managers[known_meta] = known_dcm
        self.clusters.master_urls[known_meta.master_url] = known_meta

        # Use an equivalent meta as the identifier to create a cluster.
        cid_meta = ClusterMetadata(project_id=known_meta.project_id,
                                   region=known_meta.region,
                                   cluster_name=known_meta.cluster_name)
        dcm = self.clusters.create(cid_meta)
        # The known cluster manager is returned.
        self.assertIs(dcm, known_dcm)

        # Then use an equivalent master_url as the identifier.
        cid_master_url = known_meta.master_url
        dcm = self.clusters.create(cid_master_url)
        self.assertIs(dcm, known_dcm)