def test_reuse_default_cluster_if_not_configured(self): clusters = self.current_env.clusters runner = interactive_runner.InteractiveRunner( underlying_runner=FlinkRunner()) options = PipelineOptions() # Pipeline is not configured to run on Cloud. p = beam.Pipeline(runner=runner, options=options) meta = ClusterMetadata(project_id='test-project', region='test-region') meta.master_url = 'test-url' meta.dashboard = 'test-dashboard' dcm = DataprocClusterManager(meta) # Configure the clusters so that a default cluster is known. clusters.dataproc_cluster_managers[meta] = dcm clusters.set_default_cluster(meta) runner.configure_for_flink(p, options) # The default cluster is used. tuned_meta = clusters.cluster_metadata(p) self.assertIs(tuned_meta, clusters.default_cluster_metadata) # The pipeline is known. self.assertIn(p, clusters.pipelines) registered_dcm = clusters.pipelines[p] self.assertIn(p, registered_dcm.pipelines) # The pipeline options is tuned for execution on the cluster. flink_options = options.view_as(FlinkRunnerOptions) self.assertEqual(flink_options.flink_master, tuned_meta.master_url) self.assertEqual(flink_options.flink_version, clusters.DATAPROC_FLINK_VERSION)
def test_create_but_reuse_a_known_cluster(self): known_meta = ClusterMetadata(project_id='test-project', region='test-region') known_dcm = DataprocClusterManager(known_meta) known_meta.master_url = 'test-url' self.clusters.set_default_cluster(known_meta) self.clusters.dataproc_cluster_managers[known_meta] = known_dcm self.clusters.master_urls[known_meta.master_url] = known_meta # Use an equivalent meta as the identifier to create a cluster. cid_meta = ClusterMetadata(project_id=known_meta.project_id, region=known_meta.region, cluster_name=known_meta.cluster_name) dcm = self.clusters.create(cid_meta) # The known cluster manager is returned. self.assertIs(dcm, known_dcm) # Then use an equivalent master_url as the identifier. cid_master_url = known_meta.master_url dcm = self.clusters.create(cid_master_url) self.assertIs(dcm, known_dcm)