def create_tables(self): client = Client(project='investing-management') tables = [ i.table_id for i in client.list_tables(".".join( [client.project, self.dataset])) ] if self.table not in tables: create_bq_table(table_name=self.table, dataset_name=self.dataset)
def copy_datasets(client: bigquery.Client, input_dataset, output_dataset): """ Copies tables from source dataset to a destination datasets :param client: an instantiated bigquery client object :param input_dataset: name of the input dataset :param output_dataset: name of the output dataset :return: """ # Copy input dataset tables to backup and staging datasets tables = client.list_tables(input_dataset) for table in tables: staging_table = f'{output_dataset}.{table.table_id}' client.copy_table(table, staging_table)
def list_tables( client: bigquery.Client, dataset: bigquery.DatasetReference ) -> typing.Iterator[bigquery.table.TableListItem]: """ List all tables in a dataset NOTE: Ensures all results are retrieved by first getting total table count and setting max_results in list tables API call :param client: active bigquery client object :param dataset: the dataset containing the tables :return: tables contained within the requested dataset """ table_count = get_table_count(client, dataset) return client.list_tables(dataset=dataset, max_results=table_count + _MAX_RESULTS_PADDING)
def list_tables(client: bigquery.Client, dataset_id: str): """ Lists the tables in project:dataset Args: client: BQ API client dataset_id: dataset to be inspected Returns: list Examples: list_tables(client, 'my_dataset') """ dataset_ref = client.dataset(dataset_id) return [t.table_id for t in client.list_tables(dataset_ref)]
def get_tables_matching_patterns(client: bigquery.Client, patterns: List[str]) -> List[str]: """Get BigQuery tables matching the provided patterns.""" all_projects = None all_datasets = {} all_tables = {} matching_tables = [] for pattern in patterns: project, _, dataset_table = pattern.partition(":") dataset, _, table = dataset_table.partition(".") projects = [project or client.project] dataset = dataset or "*" table = table or "*" if _uses_wildcards(project): if all_projects is None: all_projects = [p.project_id for p in client.list_projects()] projects = [p for p in all_projects if fnmatchcase(project, p)] for project in projects: datasets = [dataset] if _uses_wildcards(dataset): if project not in all_datasets: all_datasets[project] = [ d.dataset_id for d in client.list_datasets(project) ] datasets = [ d for d in all_datasets[project] if fnmatchcase(d, dataset) ] for dataset in datasets: dataset = f"{project}.{dataset}" tables = [f"{dataset}.{table}"] if _uses_wildcards(table): if dataset not in all_tables: all_tables[dataset] = list(client.list_tables(dataset)) tables = [ f"{dataset}.{t.table_id}" for t in all_tables[dataset] if fnmatchcase(t.table_id, table) ] matching_tables += tables return matching_tables
def delete_table(client: bigquery.Client, dataset_id: str, table_id: str): """ Deletes the specified table in the given project:dataset Args: client: BQ API client dataset_id: destination dataset table_id: table to be deleted Returns: Examples: delete_table(client, 'my_dataset', 'my_table') """ dataset_ref = client.dataset(dataset_id=dataset_id) tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))] if table_id not in tables_list: print("THIS TABLE DOES NOT EXIST IN {}:{}".format(client.project, dataset_id)) else: table_ref = dataset_ref.table(table_id) client.delete_table(table_ref)
def get_tables(project_id: str, client: Client, dataset_id: Optional[str] = None) -> Iterator[Table]: """ Gets BigQuery tables from a Google Cloud project. Args: project_id (str): ID of the project. dataset_id (Optional[str]): The ID of the dataset. If `None`, will retrieve tables from all datasets in project. client (Client): A Google Cloud Client instance. Yields: Table: A BigQuery table. """ dataset_refs = ([f"{project_id}.{dataset_id}"] if dataset_id else (dataset.reference for dataset in client.list_datasets(project=project_id))) datasets = (client.get_dataset(dataset_ref) for dataset_ref in dataset_refs) for dataset in datasets: for table in client.list_tables(dataset): yield client.get_table(table)
def create_table(client: bigquery.Client, dataset_id: str, table_id: str, schema: list): """ Creates a table according to the given schema in the specified project:dataset Args: client: BQ API client dataset_id: destination dataset table_id: table to be created schema: schema of the table to be created Returns: Examples: create_table(client, 'my_dataset', 'my_table', my_schema) """ dataset_ref = client.dataset(dataset_id=dataset_id) tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))] if table_id in tables_list: print("THIS TABLE ALREADY EXISTS IN {}:{}".format(client.project, dataset_id)) else: table_ref = dataset_ref.table(table_id) client.create_table(bigquery.Table(table_ref, schema))
def create_bq_table(table_name='CRY', dataset_name='price_data'): '''Create table if not exists''' client = Client() tables = [ i.table_id for i in client.list_tables(client.project + "." + dataset_name) ] if table_name not in tables: if table_name == 'CRY': schema = [ SchemaField("open", "FLOAT64", mode="NULLABLE"), SchemaField("high", "FLOAT64", mode="NULLABLE"), SchemaField("low", "FLOAT64", mode="NULLABLE"), SchemaField("close", "FLOAT64", mode="NULLABLE"), SchemaField("volume", "FLOAT64", mode="NULLABLE"), SchemaField("market_cap", "FLOAT64", mode="NULLABLE"), SchemaField("symbol", "STRING", mode="NULLABLE"), SchemaField("date", "TIMESTAMP", mode="NULLABLE"), ] else: schema = [ SchemaField("open", "FLOAT64", mode="NULLABLE"), SchemaField("high", "FLOAT64", mode="NULLABLE"), SchemaField("low", "FLOAT64", mode="NULLABLE"), SchemaField("close", "FLOAT64", mode="NULLABLE"), SchemaField("adjusted_close", "FLOAT64", mode="NULLABLE"), SchemaField("volume", "FLOAT64", mode="NULLABLE"), SchemaField("dividend_amount", "FLOAT64", mode="NULLABLE"), SchemaField("split_coefficient", "FLOAT64", mode="NULLABLE"), SchemaField("symbol", "STRING", mode="NULLABLE"), SchemaField("date", "TIMESTAMP", mode="NULLABLE"), ] table = Table(client.project + "." + dataset_name + "." + table_name, schema=schema) table = client.create_table(table) else: print("Table already exists")
def _get_existing_table_names(client: bigquery.Client, dataset: str): return [ table_item.table_id for table_item in client.list_tables(dataset=dataset) ]
def get_bq_view_names(client: bigquery.Client, dataset: str): return [ table.table_id for table in client.list_tables(dataset=dataset) if table.table_type == "VIEW" ]
class BigQuery(BaseDb): """ A Google BigQuery database client Kwargs: name : str - The canonical name to use for this instance creds_file : str - The filepath of the desired GOOGLE_APPLICATION_CREDENTIALS file conn_kwargs : Use in place of a query string to set individual attributes of the connection defaults (project, etc) """ def __init__(self, name=None, creds_file=None, **conn_kwargs): if creds_file is None: creds_file = os.getenv('BIGQUERY_CREDS_FILE', None) self._bq_creds_file = creds_file self._conn_kwargs = dict(**BIGQUERY_DEFAULT_CONN_KWARGS) self._name = name for k, v in six.iteritems(conn_kwargs): if k in self._conn_kwargs: self._conn_kwargs[k] = v def __repr__(self): return '<{db.__class__.__name__}({project})>'.format(db=self, project=self._conn_kwargs['project']) @property def name(self): return self._name @property def project(self): return self._conn_kwargs['project'] @project.setter def project(self, value): self._conn_kwargs['project'] = value def _connect(self): if self._bq_creds_file is not None: if Path(self._bq_creds_file).exists(): os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS', self._bq_creds_file) else: _log.warning('Path set by creds file does not exist: %s', self._bq_creds_file) self._conn = Client(**self._conn_kwargs) def _close(self): """ This is a no-op because the bigquery Client doesn't have a close method. The BaseDb close method will handle setting self._conn to None and self._connected to False. """ return def _query(self, query_string): self.connect() query_job = self._conn.query(query_string) return query_job.result() def query(self, query_string): from .result import QueryResult result = self._query(query_string) return QueryResult(result) def execute(self, query_string): self._query(query_string) def list_tables(self, dataset_id): """ List all tables in the provided dataset Args: dataset_id : str - The dataset to query Returns: list of table names """ self.connect() dataset_ref = self._conn.dataset(dataset_id) return [t.table_id for t in self._conn.list_tables(dataset_ref)] def delete_table(self, dataset_id, table_id): """ Delete the given table in the given dataset Args: dataset_id : str - The dataset containing the table to delete table_id : str - The table to delete Returns: None """ self.connect() table_ref = self._conn.dataset(dataset_id).table(table_id) self._conn.delete_table(table_ref)
def get_table_list(client: bq.Client, dataset_name: str) -> List[dict]: item_list = client.list_tables(dataset=dataset_name) result = [] for item in item_list: result.append(item) return result