def extract_program_names(release_table, do_batch): sql = extract_program_names_sql(release_table) results = bq_harness_with_result(sql, do_batch) retval = [] for row in results: pn = row.cases__project__program__name if pn is not None and pn != "None": retval.append(pn) return retval
def extract_program_names(release_table, do_batch): sql = extract_program_names_sql(release_table) results = bq_harness_with_result(sql, do_batch) retval = [ row.program_name for row in results if row.program_name is not None and row.program_name != "None" ] return retval
def extract_program_names(release_table, do_batch): sql = extract_program_names_sql(release_table) results = bq_harness_with_result(sql, do_batch) retval = [] for row in results: pn = row.program_name if pn is not None and pn != "None": retval.append(pn.replace( ".", "_")) # handles BEATAML1.0 FIXME! Make it general return retval
def create_all_shadow_tables(source_client, shadow_client, source_project, target_project, do_batch, shadow_prefix, skip_datasets, view_friendly_names, do_tables): dataset_list = source_client.list_datasets() for dataset in dataset_list: # Some datasets (security logs) should be ignored outright: if dataset.dataset_id in skip_datasets: continue table_list = list(source_client.list_tables(dataset.dataset_id)) for tbl in table_list: tbl_obj = source_client.get_table(tbl) use_row_count = tbl_obj.num_rows use_query = None # # If we have a view, then we need to extract the row count through a query: # if tbl_obj.view_query is not None: if do_tables: continue src_tab_id = '{}.{}.{}'.format(source_project, dataset.dataset_id, tbl.table_id) sql = 'SELECT COUNT(*) as count FROM `{}`'.format(src_tab_id) results = bq_harness_with_result(sql, do_batch) for row in results: use_row_count = row.count break use_query = tbl_obj.view_query.replace(source_project, target_project) if do_tables or (use_query is not None): table_id = '{}.{}.{}'.format(target_project, dataset.dataset_id, tbl.table_id) print(table_id) # # Make a completely new copy of the source schema. Do we have to? Probably not. Pananoid. # targ_schema = [] for sf in tbl_obj.schema: name = sf.name field_type = sf.field_type mode = sf.mode desc = sf.description fields = tuple(sf.fields) # no "copy constructor"? targ_schema.append( bigquery.SchemaField(name, field_type, mode, desc, fields)) # # Not supposed to submit a schema for a view! But we need to update it later to get the # descriptions brought across # if use_query is None: targ_table = bigquery.Table(table_id, schema=targ_schema) else: targ_table = bigquery.Table(table_id) targ_table.friendlyName = tbl_obj.friendly_name targ_table.description = tbl_obj.description if tbl_obj.labels is not None: targ_table.labels = tbl_obj.labels.copy() else: targ_table.labels = {} # # The way a table turns into a view is by setting the view_query property: # if use_query is not None: targ_table.view_query = use_query # # "Number of rows" in a shadow empty table is provided through a private tag label. Same # with friendly name: # num_row_tag = "{}_{}".format(shadow_prefix, "num_rows") targ_table.labels[num_row_tag] = use_row_count friendly_name_tag = "{}_{}".format(shadow_prefix, "friendly_name") friendly_name_key = "{}.{}".format(dataset.dataset_id, tbl.table_id) for adict in view_friendly_names: view_id, friendly = next(iter(adict.items())) if view_id == friendly_name_key: use_name = friendly break targ_table.labels[friendly_name_tag] = use_name shadow_table = shadow_client.create_table(targ_table) # # If we created a view, update the schema after creation: # if use_query is not None: shadow_table.schema = targ_schema shadow_client.update_table(shadow_table, ["schema"]) return True