def test_get_sub_dag_multiple_input_views2(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list) input_views = [ self.x_shaped_dag_views_list[2], self.x_shaped_dag_views_list[4], ] # Get descendants sub-dag sub_dag = all_views_dag_walker.get_descendants_sub_dag( input_views, ) # Only should include this view expected_views = [ self.x_shaped_dag_views_list[2], self.x_shaped_dag_views_list[3], self.x_shaped_dag_views_list[4], ] self.assertCountEqual(expected_views, sub_dag.views) # Get ancestors sub-dag sub_dag = all_views_dag_walker.get_ancestors_sub_dag(input_views) expected_views = [ self.x_shaped_dag_views_list[0], self.x_shaped_dag_views_list[1], self.x_shaped_dag_views_list[2], self.x_shaped_dag_views_list[4], ] self.assertCountEqual(expected_views, sub_dag.views)
def test_sub_dag_with_cycle(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.diamond_shaped_dag_views_list) input_views = [ self.diamond_shaped_dag_views_list[1], self.diamond_shaped_dag_views_list[4], ] # Get descendants sub-dag sub_dag = all_views_dag_walker.get_descendants_sub_dag(input_views) expected_views = [ self.diamond_shaped_dag_views_list[1], self.diamond_shaped_dag_views_list[2], self.diamond_shaped_dag_views_list[3], self.diamond_shaped_dag_views_list[4], self.diamond_shaped_dag_views_list[5], ] self.assertCountEqual(expected_views, sub_dag.views) # Get ancestors sub-dag sub_dag = all_views_dag_walker.get_ancestors_sub_dag( input_views, ) expected_views = [ self.diamond_shaped_dag_views_list[0], self.diamond_shaped_dag_views_list[1], self.diamond_shaped_dag_views_list[2], self.diamond_shaped_dag_views_list[4], ] self.assertCountEqual(expected_views, sub_dag.views)
def test_get_sub_dag_leaf_node(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list) # Get descendants sub-dag sub_dag = all_views_dag_walker.get_descendants_sub_dag( [self.x_shaped_dag_views_list[4]], ) # Only should include this view expected_views = [self.x_shaped_dag_views_list[4]] self.assertCountEqual(expected_views, sub_dag.views) # Get ancestors sub-dag sub_dag = all_views_dag_walker.get_ancestors_sub_dag( [self.x_shaped_dag_views_list[4]] ) expected_views = [ v for v in self.x_shaped_dag_views_list # This view does not depend on other leaf view "table_4" if v.view_id != "table_4" ] self.assertCountEqual(expected_views, sub_dag.views)
def test_get_sub_dag_empty_input_views(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list) # Get descendants sub-dag sub_dag = all_views_dag_walker.get_descendants_sub_dag([]) self.assertCountEqual([], sub_dag.views) # Get ancestors sub-dag sub_dag = all_views_dag_walker.get_ancestors_sub_dag([]) self.assertCountEqual([], sub_dag.views)
def test_get_sub_dag_single_node_input(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list[0:1]) # Get descendants sub-dag sub_dag = all_views_dag_walker.get_descendants_sub_dag( self.x_shaped_dag_views_list[0:1], ) expected_views = self.x_shaped_dag_views_list[0:1] self.assertCountEqual(expected_views, sub_dag.views) # Get ancestors sub-dag sub_dag = all_views_dag_walker.get_ancestors_sub_dag( self.x_shaped_dag_views_list[0:1] ) # Only should include this view expected_views = self.x_shaped_dag_views_list[0:1] self.assertCountEqual(expected_views, sub_dag.views)
def test_get_sub_dag_middle_node(self) -> None: all_views_dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list) # Get descendants sub-dag descendants_sub_dag = all_views_dag_walker.get_descendants_sub_dag( [self.x_shaped_dag_views_list[2]], ) expected_views = [ self.x_shaped_dag_views_list[2], self.x_shaped_dag_views_list[3], self.x_shaped_dag_views_list[4], ] self.assertCountEqual(expected_views, descendants_sub_dag.views) # Get ancestors sub-dag ancestors_sub_dag = all_views_dag_walker.get_ancestors_sub_dag( [self.x_shaped_dag_views_list[2]] ) expected_views = [ self.x_shaped_dag_views_list[0], self.x_shaped_dag_views_list[1], self.x_shaped_dag_views_list[2], ] self.assertCountEqual(expected_views, ancestors_sub_dag.views) # Get both directions sub-dag both_directions_dag = BigQueryViewDagWalker.union_dags( descendants_sub_dag, ancestors_sub_dag ) expected_views = self.x_shaped_dag_views_list self.assertCountEqual(expected_views, both_directions_dag.views)
def rematerialize_views( views_to_update: List[BigQueryView], all_view_builders: Sequence[BigQueryViewBuilder], view_source_table_datasets: Set[str], dataset_overrides: Optional[Dict[str, str]] = None, skip_missing_views: bool = False, bq_region_override: Optional[str] = None, ) -> None: """For all views in the provided |views_to_update| list, re-materializes any materialized views. This should be called only when we want to refresh the data in the materialized view(s), not when we want to update the underlying query of the view(s). Args: views_to_update: List of views to re-materialize all_view_builders: Superset of the views_to_update that contains all views that either depend on or are dependents of the list of input views. view_source_table_datasets: Set of datasets containing tables that can be treated as root nodes in the view dependency graph. dataset_overrides: A dictionary mapping dataset_ids to the dataset name they should be replaced with for the given list of views_to_update. skip_missing_views: If True, ignores any input views that do not exist. If False, crashes if tries to materialize a view that does not exist. bq_region_override: If set, overrides the region (e.g. us-east1) associated with all BigQuery operations. """ set_default_table_expiration_for_new_datasets = bool(dataset_overrides) if set_default_table_expiration_for_new_datasets: logging.info( "Found non-empty dataset overrides. New datasets created in this process will have a " "default table expiration of 24 hours.") try: bq_client = BigQueryClientImpl(region_override=bq_region_override) all_views_dag_walker = BigQueryViewDagWalker( _build_views_to_update( view_source_table_datasets=view_source_table_datasets, candidate_view_builders=all_view_builders, dataset_overrides=dataset_overrides, )) dataset_map = get_managed_view_and_materialized_table_addresses_by_dataset( all_views_dag_walker) _create_all_datasets_if_necessary( bq_client, list(dataset_map.keys()), set_default_table_expiration_for_new_datasets, ) # Limit DAG to only ancestor views and the set of views to update ancestors_dag_walker = all_views_dag_walker.get_ancestors_sub_dag( views_to_update) def _materialize_view( v: BigQueryView, _parent_results: Dict[BigQueryView, None]) -> None: if not v.materialized_address: logging.info("Skipping non-materialized view [%s.%s].", v.dataset_id, v.view_id) return if skip_missing_views and not bq_client.table_exists( bq_client.dataset_ref_for_id(dataset_id=v.dataset_id), v.view_id): logging.info( "Skipping materialization of view [%s.%s] which does not exist", v.dataset_id, v.view_id, ) return bq_client.materialize_view_to_table(v) ancestors_dag_walker.process_dag(_materialize_view) except Exception as e: with monitoring.measurements() as measurements: measurements.measure_int_put(m_failed_view_update, 1) raise e from e