コード例 #1
0
 def create_tables(self):
     client = Client(project='investing-management')
     tables = [
         i.table_id for i in client.list_tables(".".join(
             [client.project, self.dataset]))
     ]
     if self.table not in tables:
         create_bq_table(table_name=self.table, dataset_name=self.dataset)
コード例 #2
0
ファイル: bq.py プロジェクト: dcarbone/curation
def copy_datasets(client: bigquery.Client, input_dataset, output_dataset):
    """
    Copies tables from source dataset to a destination datasets

    :param client: an instantiated bigquery client object
    :param input_dataset: name of the input dataset
    :param output_dataset: name of the output dataset
    :return:
    """
    # Copy input dataset tables to backup and staging datasets
    tables = client.list_tables(input_dataset)
    for table in tables:
        staging_table = f'{output_dataset}.{table.table_id}'
        client.copy_table(table, staging_table)
コード例 #3
0
ファイル: bq.py プロジェクト: dcarbone/curation
def list_tables(
    client: bigquery.Client, dataset: bigquery.DatasetReference
) -> typing.Iterator[bigquery.table.TableListItem]:
    """
    List all tables in a dataset
    
    NOTE: Ensures all results are retrieved by first getting total
    table count and setting max_results in list tables API call
     
    :param client: active bigquery client object
    :param dataset: the dataset containing the tables
    :return: tables contained within the requested dataset
    """
    table_count = get_table_count(client, dataset)
    return client.list_tables(dataset=dataset,
                              max_results=table_count + _MAX_RESULTS_PADDING)
コード例 #4
0
def list_tables(client: bigquery.Client, dataset_id: str):
    """
    Lists the tables in project:dataset

    Args:
        client: BQ API client
        dataset_id: dataset to be inspected

    Returns:
        list

    Examples:
        list_tables(client, 'my_dataset')
    """
    dataset_ref = client.dataset(dataset_id)
    return [t.table_id for t in client.list_tables(dataset_ref)]
コード例 #5
0
def get_tables_matching_patterns(client: bigquery.Client,
                                 patterns: List[str]) -> List[str]:
    """Get BigQuery tables matching the provided patterns."""
    all_projects = None
    all_datasets = {}
    all_tables = {}
    matching_tables = []

    for pattern in patterns:
        project, _, dataset_table = pattern.partition(":")
        dataset, _, table = dataset_table.partition(".")
        projects = [project or client.project]
        dataset = dataset or "*"
        table = table or "*"
        if _uses_wildcards(project):
            if all_projects is None:
                all_projects = [p.project_id for p in client.list_projects()]
            projects = [p for p in all_projects if fnmatchcase(project, p)]
        for project in projects:
            datasets = [dataset]
            if _uses_wildcards(dataset):
                if project not in all_datasets:
                    all_datasets[project] = [
                        d.dataset_id for d in client.list_datasets(project)
                    ]
                datasets = [
                    d for d in all_datasets[project]
                    if fnmatchcase(d, dataset)
                ]
            for dataset in datasets:
                dataset = f"{project}.{dataset}"
                tables = [f"{dataset}.{table}"]
                if _uses_wildcards(table):
                    if dataset not in all_tables:
                        all_tables[dataset] = list(client.list_tables(dataset))
                    tables = [
                        f"{dataset}.{t.table_id}" for t in all_tables[dataset]
                        if fnmatchcase(t.table_id, table)
                    ]
                matching_tables += tables

    return matching_tables
コード例 #6
0
def delete_table(client: bigquery.Client, dataset_id: str, table_id: str):
    """
    Deletes the specified table in the given project:dataset

    Args:
        client: BQ API client
        dataset_id: destination dataset
        table_id: table to be deleted

    Returns:

    Examples:
        delete_table(client, 'my_dataset', 'my_table')
    """
    dataset_ref = client.dataset(dataset_id=dataset_id)
    tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))]

    if table_id not in tables_list:
        print("THIS TABLE DOES NOT EXIST IN {}:{}".format(client.project, dataset_id))
    else:
        table_ref = dataset_ref.table(table_id)
        client.delete_table(table_ref)
コード例 #7
0
def get_tables(project_id: str,
               client: Client,
               dataset_id: Optional[str] = None) -> Iterator[Table]:
    """
    Gets BigQuery tables from a Google Cloud project.

    Args:
        project_id (str): ID of the project.
        dataset_id (Optional[str]): The ID of the dataset.
            If `None`, will retrieve tables from all datasets in project.
        client (Client): A Google Cloud Client instance.

    Yields:
        Table: A BigQuery table.
    """
    dataset_refs = ([f"{project_id}.{dataset_id}"] if dataset_id else
                    (dataset.reference
                     for dataset in client.list_datasets(project=project_id)))
    datasets = (client.get_dataset(dataset_ref)
                for dataset_ref in dataset_refs)
    for dataset in datasets:
        for table in client.list_tables(dataset):
            yield client.get_table(table)
コード例 #8
0
def create_table(client: bigquery.Client, dataset_id: str, table_id: str, schema: list):
    """
    Creates a table according to the given schema in the specified project:dataset

    Args:
        client: BQ API client
        dataset_id: destination dataset
        table_id: table to be created
        schema: schema of the table to be created

    Returns:

    Examples:
        create_table(client, 'my_dataset', 'my_table', my_schema)
    """
    dataset_ref = client.dataset(dataset_id=dataset_id)
    tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))]

    if table_id in tables_list:
        print("THIS TABLE ALREADY EXISTS IN {}:{}".format(client.project, dataset_id))
    else:
        table_ref = dataset_ref.table(table_id)
        client.create_table(bigquery.Table(table_ref, schema))
コード例 #9
0
def create_bq_table(table_name='CRY', dataset_name='price_data'):
    '''Create table if not exists'''
    client = Client()
    tables = [
        i.table_id
        for i in client.list_tables(client.project + "." + dataset_name)
    ]
    if table_name not in tables:
        if table_name == 'CRY':
            schema = [
                SchemaField("open", "FLOAT64", mode="NULLABLE"),
                SchemaField("high", "FLOAT64", mode="NULLABLE"),
                SchemaField("low", "FLOAT64", mode="NULLABLE"),
                SchemaField("close", "FLOAT64", mode="NULLABLE"),
                SchemaField("volume", "FLOAT64", mode="NULLABLE"),
                SchemaField("market_cap", "FLOAT64", mode="NULLABLE"),
                SchemaField("symbol", "STRING", mode="NULLABLE"),
                SchemaField("date", "TIMESTAMP", mode="NULLABLE"),
            ]
        else:
            schema = [
                SchemaField("open", "FLOAT64", mode="NULLABLE"),
                SchemaField("high", "FLOAT64", mode="NULLABLE"),
                SchemaField("low", "FLOAT64", mode="NULLABLE"),
                SchemaField("close", "FLOAT64", mode="NULLABLE"),
                SchemaField("adjusted_close", "FLOAT64", mode="NULLABLE"),
                SchemaField("volume", "FLOAT64", mode="NULLABLE"),
                SchemaField("dividend_amount", "FLOAT64", mode="NULLABLE"),
                SchemaField("split_coefficient", "FLOAT64", mode="NULLABLE"),
                SchemaField("symbol", "STRING", mode="NULLABLE"),
                SchemaField("date", "TIMESTAMP", mode="NULLABLE"),
            ]
        table = Table(client.project + "." + dataset_name + "." + table_name,
                      schema=schema)
        table = client.create_table(table)
    else:
        print("Table already exists")
コード例 #10
0
def _get_existing_table_names(client: bigquery.Client, dataset: str):
    return [
        table_item.table_id
        for table_item in client.list_tables(dataset=dataset)
    ]
コード例 #11
0
def get_bq_view_names(client: bigquery.Client, dataset: str):
    return [
        table.table_id for table in client.list_tables(dataset=dataset)
        if table.table_type == "VIEW"
    ]
コード例 #12
0
class BigQuery(BaseDb):
    """
        A Google BigQuery database client

        Kwargs:
            name : str - The canonical name to use for this instance
            creds_file : str - The filepath of the desired GOOGLE_APPLICATION_CREDENTIALS file
            conn_kwargs : Use in place of a query string to set individual
                          attributes of the connection defaults (project, etc)
    """

    def __init__(self, name=None, creds_file=None, **conn_kwargs):
        if creds_file is None:
            creds_file = os.getenv('BIGQUERY_CREDS_FILE', None)
        self._bq_creds_file = creds_file

        self._conn_kwargs = dict(**BIGQUERY_DEFAULT_CONN_KWARGS)

        self._name = name
        for k, v in six.iteritems(conn_kwargs):
            if k in self._conn_kwargs:
                self._conn_kwargs[k] = v

    def __repr__(self):
        return '<{db.__class__.__name__}({project})>'.format(db=self, project=self._conn_kwargs['project'])

    @property
    def name(self):
        return self._name

    @property
    def project(self):
        return self._conn_kwargs['project']

    @project.setter
    def project(self, value):
        self._conn_kwargs['project'] = value

    def _connect(self):
        if self._bq_creds_file is not None:
            if Path(self._bq_creds_file).exists():
                os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS', self._bq_creds_file)
            else:
                _log.warning('Path set by creds file does not exist: %s', self._bq_creds_file)
        self._conn = Client(**self._conn_kwargs)

    def _close(self):
        """
            This is a no-op because the bigquery Client doesn't have a close method.
            The BaseDb close method will handle setting self._conn to None and self._connected to False.
        """
        return

    def _query(self, query_string):
        self.connect()
        query_job = self._conn.query(query_string)
        return query_job.result()

    def query(self, query_string):
        from .result import QueryResult
        result = self._query(query_string)
        return QueryResult(result)

    def execute(self, query_string):
        self._query(query_string)

    def list_tables(self, dataset_id):
        """
            List all tables in the provided dataset

            Args:
                dataset_id : str - The dataset to query

            Returns:
                list of table names
        """
        self.connect()
        dataset_ref = self._conn.dataset(dataset_id)
        return [t.table_id for t in self._conn.list_tables(dataset_ref)]

    def delete_table(self, dataset_id, table_id):
        """
            Delete the given table in the given dataset

            Args:
                dataset_id : str - The dataset containing the table to delete
                table_id : str - The table to delete

            Returns:
                None
        """
        self.connect()
        table_ref = self._conn.dataset(dataset_id).table(table_id)
        self._conn.delete_table(table_ref)
コード例 #13
0
def get_table_list(client: bq.Client, dataset_name: str) -> List[dict]:
    item_list = client.list_tables(dataset=dataset_name)
    result = []
    for item in item_list:
        result.append(item)
    return result