Example #1
0
def cleanup_datasets(bigquery_client: bigquery.Client):
    yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
    for dataset in bigquery_client.list_datasets():
        if (dataset.dataset_id.startswith(RESOURCE_PREFIX)
                and resource_name_to_date(dataset.dataset_id) < yesterday):
            bigquery_client.delete_dataset(dataset,
                                           delete_contents=True,
                                           not_found_ok=True)
Example #2
0
def create_bq_dataset(dataset_name='price_data'):
    '''Create dataset if not exists'''
    client = Client()
    datasets = [
        client.project + "." + i.dataset_id
        for i in list(client.list_datasets())
    ]
    if client.project + "." + dataset_name not in datasets:
        dataset = Dataset(dataset_name)
        dataset.location = "US"
        client.create_dataset(dataset)
    else:
        print("Dataset already exists")
Example #3
0
def list_datasets(client: bigquery.Client):
    """
    Lists the dataset in project

    Args:
        client: BQ API client (default project defined in you GOOGLE_APPLICATION_CREDENTIALS)

    Returns:
        list

    Examples:
        list_datasets(client)
    """
    return [d.dataset_id for d in list(client.list_datasets())]
Example #4
0
def main(prefixes):
    client = Client()

    pattern = re.compile('|'.join('^{}.*$'.format(prefix)
                                  for prefix in prefixes))

    ds_items = list(client.list_datasets())
    for dataset in ds_items:
        ds_id = dataset.dataset_id
        if pattern.match(ds_id):
            print("Deleting dataset: {}".format(ds_id))
            try:
                client.delete_dataset(dataset.reference, delete_contents=True)
            except NotFound:
                print("   NOT FOUND")
Example #5
0
def get_tables_matching_patterns(client: bigquery.Client,
                                 patterns: List[str]) -> List[str]:
    """Get BigQuery tables matching the provided patterns."""
    all_projects = None
    all_datasets = {}
    all_tables = {}
    matching_tables = []

    for pattern in patterns:
        project, _, dataset_table = pattern.partition(":")
        dataset, _, table = dataset_table.partition(".")
        projects = [project or client.project]
        dataset = dataset or "*"
        table = table or "*"
        if _uses_wildcards(project):
            if all_projects is None:
                all_projects = [p.project_id for p in client.list_projects()]
            projects = [p for p in all_projects if fnmatchcase(project, p)]
        for project in projects:
            datasets = [dataset]
            if _uses_wildcards(dataset):
                if project not in all_datasets:
                    all_datasets[project] = [
                        d.dataset_id for d in client.list_datasets(project)
                    ]
                datasets = [
                    d for d in all_datasets[project]
                    if fnmatchcase(d, dataset)
                ]
            for dataset in datasets:
                dataset = f"{project}.{dataset}"
                tables = [f"{dataset}.{table}"]
                if _uses_wildcards(table):
                    if dataset not in all_tables:
                        all_tables[dataset] = list(client.list_tables(dataset))
                    tables = [
                        f"{dataset}.{t.table_id}" for t in all_tables[dataset]
                        if fnmatchcase(t.table_id, table)
                    ]
                matching_tables += tables

    return matching_tables
Example #6
0
def get_tables(project_id: str,
               client: Client,
               dataset_id: Optional[str] = None) -> Iterator[Table]:
    """
    Gets BigQuery tables from a Google Cloud project.

    Args:
        project_id (str): ID of the project.
        dataset_id (Optional[str]): The ID of the dataset.
            If `None`, will retrieve tables from all datasets in project.
        client (Client): A Google Cloud Client instance.

    Yields:
        Table: A BigQuery table.
    """
    dataset_refs = ([f"{project_id}.{dataset_id}"] if dataset_id else
                    (dataset.reference
                     for dataset in client.list_datasets(project=project_id)))
    datasets = (client.get_dataset(dataset_ref)
                for dataset_ref in dataset_refs)
    for dataset in datasets:
        for table in client.list_tables(dataset):
            yield client.get_table(table)
Example #7
0
def cleanup_datasets(bq_client: bigquery.Client):
    for dataset in bq_client.list_datasets():
        if prefixer.should_cleanup(dataset.dataset_id):
            bq_client.delete_dataset(dataset,
                                     delete_contents=True,
                                     not_found_ok=True)
Example #8
0
 def validate_dataset(self):
     client = Client(project='investing-management')
     datasets = [i.dataset_id for i in list(client.list_datasets())]
     if self.dataset not in datasets:
         create_bq_dataset(dataset_name=self.dataset)