Example #1
0
    def publicize(self, mode="all"):
        """Changes IAM configuration to turn BigQuery dataset public.

        Args:
            mode (bool): Which dataset to create [prod|staging|all].
        """

        for m in self._loop_modes(mode):

            dataset = m["client"].get_dataset(m["id"])
            entries = dataset.access_entries

            entries.extend(
                [
                    bigquery.AccessEntry(
                        role="roles/bigquery.dataViewer",
                        entity_type="iamMember",
                        entity_id="allUsers",
                    ),
                    bigquery.AccessEntry(
                        role="roles/bigquery.metadataViewer",
                        entity_type="iamMember",
                        entity_id="allUsers",
                    ),
                    bigquery.AccessEntry(
                        role="roles/bigquery.user",
                        entity_type="iamMember",
                        entity_id="allUsers",
                    ),
                ]
            )
            dataset.access_entries = entries

            m["client"].update_dataset(dataset, ["access_entries"])
def publish_udf(raw_udf, client, project_id, gcs_bucket, gcs_path, known_udfs):
    """Publish a specific UDF to BigQuery."""
    # create new dataset for UDF if necessary
    dataset = client.create_dataset(raw_udf.dataset, exists_ok=True)

    # set permissions for dataset, public for everyone
    entry = bigquery.AccessEntry("READER", "specialGroup",
                                 "allAuthenticatedUsers")
    entries = list(dataset.access_entries)
    entries.append(entry)
    dataset.access_entries = entries
    dataset = client.update_dataset(dataset, ["access_entries"])

    # transforms temporary UDF to persistent UDFs and publishes them
    for definition in raw_udf.definitions:
        # Within a standard SQL function, references to other entities require
        # explicit project IDs

        for udf in known_udfs:
            definition.replace(udf, f"`{project_id}`.{udf}")

        # adjust paths for dependencies stored in GCS
        query = OPTIONS_LIB_RE.sub(
            fr'library = "gs://{gcs_bucket}/{gcs_path}\1"', definition)

        client.query(query).result()
def update_dataset_access(dataset_id):

    # [START bigquery_update_dataset_access]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set dataset_id to the ID of the dataset to fetch.
    # dataset_id = 'your-project.your_dataset'

    dataset = client.get_dataset(dataset_id)  # Make an API request.

    entry = bigquery.AccessEntry(
        role="READER",
        entity_type="userByEmail",
        entity_id="*****@*****.**",
    )

    entries = list(dataset.access_entries)
    entries.append(entry)
    dataset.access_entries = entries

    dataset = client.update_dataset(dataset,
                                    ["access_entries"])  # Make an API request.

    full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
    print("Updated dataset '{}' with modified user permissions.".format(
        full_dataset_id))
Example #4
0
def test_update_dataset_multiple_properties(client, to_delete):
    """Update a dataset's metadata."""
    DATASET_ID = 'update_dataset_multiple_properties_%d' % (_millis(), )
    dataset = bigquery.Dataset(client.dataset(DATASET_ID))
    dataset.description = ORIGINAL_DESCRIPTION
    dataset = client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START update_dataset_multiple_properties]
    assert dataset.description == ORIGINAL_DESCRIPTION
    assert dataset.default_table_expiration_ms is None
    entry = bigquery.AccessEntry(role='READER',
                                 entity_type='domain',
                                 entity_id='example.com')
    assert entry not in dataset.access_entries
    ONE_DAY_MS = 24 * 60 * 60 * 1000  # in milliseconds
    dataset.description = UPDATED_DESCRIPTION
    dataset.default_table_expiration_ms = ONE_DAY_MS
    entries = list(dataset.access_entries)
    entries.append(entry)
    dataset.access_entries = entries

    dataset = client.update_dataset(
        dataset,
        ['description', 'default_table_expiration_ms', 'access_entries'
         ])  # API request

    assert dataset.description == UPDATED_DESCRIPTION
    assert dataset.default_table_expiration_ms == ONE_DAY_MS
    assert entry in dataset.access_entries
Example #5
0
def bq_update_access_to_dataset(dataset_id, role, entity_type, entity_id):
    '''
        Update access control for a BigQuery Dataset
        
        USAGE:
        bq_modify_access_to_dataset('ztest1','READER','userByEmail','*****@*****.**')
        
        Required Permissions:
        bigquery.dataOwner
        bigquery.admin
    
    '''
    try:
        client = bigquery.Client()
        dataset = client.get_dataset(client.dataset(dataset_id))

        entry = bigquery.AccessEntry(
            role=role,  # 'READER', 'WRITER', 'OWNER'
            entity_type=
            entity_type,  # 'userByEmail', 'groupByEmail', 'domain','specialGroup', 'view'
            entity_id=entity_id  # User or resource to grant access to
        )

        assert entry not in dataset.access_entries
        entries = list(dataset.access_entries)
        entries.append(entry)
        dataset.access_entries = entries

        dataset = client.update_dataset(dataset,
                                        ['access_entries'])  # API request

        assert entry in dataset.access_entries

    except Exception as e:
        print('[ ERROR ] {}'.format(e))
def share_dataset_all_users_read(bigquery_client, full_dataset_name):
    bigquery.AccessEntry.ENTITY_TYPES = ["userByEmail", "groupByEmail", "domain", "specialGroup", "view", "iamMember"]

    role = 'READER'
    entity_type = 'iamMember'
    entity_id = 'allUsers'

    dataset = bigquery_client.get_dataset(full_dataset_name)
    entries = list(dataset.access_entries)
    is_shared = False
    for entry in entries:
        if entry.role == role and entry.entity_type == entity_type and entry.entity_id == entity_id:
            is_shared = True

    if not is_shared:
        entry = bigquery.AccessEntry(
            role=role,
            entity_type=entity_type,
            entity_id=entity_id,
        )
        entries.append(entry)
        dataset.access_entries = entries
        dataset = bigquery_client.update_dataset(dataset, ["access_entries"])
        logging.info('Updated dataset permissions')
    else:
        logging.info('The dataset is already shared')
Example #7
0
 def authorize(self, shared_dataset_id, shared_view_id, source_dataset_id):
     client = self.get_client()
     shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id))
     source_dataset = bigquery.Dataset(client.dataset(source_dataset_id))
     view = bigquery.Table(shared_dataset.table(shared_view_id))
     access_entries = source_dataset.access_entries
     access_entry = bigquery.AccessEntry(None, 'view',
                                         view.reference.to_api_repr())
     access_entries.append(access_entry)
     source_dataset.access_entries = access_entries
     client.update_dataset(source_dataset, ['access_entries'])
Example #8
0
def publish_routine(raw_routine, client, project_id, gcs_bucket, gcs_path,
                    known_udfs, is_public):
    """Publish a specific routine to BigQuery."""
    if is_public:
        # create new dataset for routine if necessary
        dataset = client.create_dataset(raw_routine.dataset, exists_ok=True)

        # set permissions for dataset, public for everyone
        entry = bigquery.AccessEntry("READER", "specialGroup",
                                     "allAuthenticatedUsers")
        entries = list(dataset.access_entries)
        entries.append(entry)
        dataset.access_entries = entries
        dataset = client.update_dataset(dataset, ["access_entries"])

    # transforms temporary UDF to persistent UDFs and publishes them
    for definition in raw_routine.definitions:
        # Within a standard SQL function, references to other entities require
        # explicit project IDs
        for udf in set(known_udfs):
            # ensure UDF definitions are not replaced twice as would be the case for
            # `mozfun`.stats.mode_last and `mozfun`.stats.mode_last_retain_nulls
            # since one name is a substring of the other
            definition = definition.replace(f"`{project_id}`.{udf}", udf)
            definition = definition.replace(f"{project_id}.{udf}", udf)
            definition = definition.replace(udf, f"`{project_id}`.{udf}")

        # adjust paths for dependencies stored in GCS
        query = OPTIONS_LIB_RE.sub(
            fr'library = "gs://{gcs_bucket}/{gcs_path}\1"', definition)

        # add UDF descriptions
        if raw_routine.filepath not in SKIP and not raw_routine.is_stored_procedure:
            # descriptions need to be escaped since quotation marks and other
            # characters, such as \x01, will make the query invalid otherwise
            escaped_description = json.dumps(str(raw_routine.description))
            query = OPTIONS_RE.sub(
                f"OPTIONS(description={escaped_description},", query)

            if "OPTIONS(" not in query and query[-1] == ";":
                query = query[:-1] + f"OPTIONS(description={escaped_description});"

        print(f"Publish {raw_routine.name}")
        client.query(query).result()
def test_update_dataset_access(client, to_delete):
    """Update a dataset's metadata."""
    DATASET_ID = 'update_dataset_access_{}'.format(_millis())
    dataset = bigquery.Dataset(client.dataset(DATASET_ID))
    dataset = client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_update_dataset_access]
    entry = bigquery.AccessEntry(role='READER',
                                 entity_type='userByEmail',
                                 entity_id='*****@*****.**')
    assert entry not in dataset.access_entries
    entries = list(dataset.access_entries)
    entries.append(entry)
    dataset.access_entries = entries

    dataset = client.update_dataset(dataset, ['access_entries'])  # API request

    assert entry in dataset.access_entries
Example #10
0
def add_bq_iam_member(project, db_name):
    client = bigquery.Client()
    name = project + "." + db_name
    dataset = client.get_dataset(name)

    entry = bigquery.AccessEntry(
        role="OWNER",
        entity_type="userByEmail",
        entity_id="*****@*****.**",
    )

    entries = list(dataset.access_entries)
    entries.append(entry)
    dataset.access_entries = entries

    dataset = client.update_dataset(dataset, ["access_entries"])

    full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
    print("Updated dataset '{}' with modified user permissions.".format(
        full_dataset_id))
Example #11
0
def _share_views(views_project, views_dataset, origin_project, origin_dataset,
                 tables):
    client_views_project = bigquery.Client(views_project)
    client_origin_project = bigquery.Client(origin_project)

    views_dataset_obj = bigquery.Dataset(
        client_views_project.dataset(views_dataset))
    origin_dataset_obj = bigquery.Dataset(
        client_origin_project.dataset(origin_dataset))

    access_entries = origin_dataset_obj.access_entries

    for table in tables:
        view = bigquery.Table(views_dataset_obj.table(table))
        access_entries.append(
            bigquery.AccessEntry(None, 'view', view.reference.to_api_repr()))

    origin_dataset_obj.access_entries = access_entries
    origin_dataset_obj = client_views_project.update_dataset(
        origin_dataset_obj, ['access_entries'])
Example #12
0
def grant_access(override_values={}):
    # [START bigquery_grant_view_access]
    from google.cloud import bigquery

    client = bigquery.Client()

    # To use a view, the analyst requires ACLs to both the view and the source
    # table. Create an authorized view to allow an analyst to use a view
    # without direct access permissions to the source table.
    view_dataset_id = "my-project.my_view_dataset"
    # [END bigquery_grant_view_access]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    view_dataset_id = override_values.get("view_dataset_id", view_dataset_id)
    # [START bigquery_grant_view_access]
    # Make an API request to get the view dataset ACLs.
    view_dataset = client.get_dataset(view_dataset_id)

    analyst_group_email = "*****@*****.**"
    # [END bigquery_grant_view_access]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    analyst_group_email = override_values.get("analyst_group_email",
                                              analyst_group_email)
    # [START bigquery_grant_view_access]
    access_entries = view_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email))
    view_dataset.access_entries = access_entries

    # Make an API request to update the ACLs property of the view dataset.
    view_dataset = client.update_dataset(view_dataset, ["access_entries"])
    print(f"Access to view: {view_dataset.access_entries}")

    # Group members of "*****@*****.**" now have access to the view,
    # but they require access to the source table to use it. To remove this
    # restriction, authorize the view to access the source dataset.
    source_dataset_id = "my-project.my_source_dataset"
    # [END bigquery_grant_view_access]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    source_dataset_id = override_values.get("source_dataset_id",
                                            source_dataset_id)
    # [START bigquery_grant_view_access]
    # Make an API request to set the source dataset ACLs.
    source_dataset = client.get_dataset(source_dataset_id)

    view_reference = {
        "projectId": "my-project",
        "datasetId": "my_view_dataset",
        "tableId": "my_authorized_view",
    }
    # [END bigquery_grant_view_access]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    view_reference = override_values.get("view_reference", view_reference)
    # [START bigquery_grant_view_access]
    access_entries = source_dataset.access_entries
    access_entries.append(bigquery.AccessEntry(None, "view", view_reference))
    source_dataset.access_entries = access_entries

    # Make an API request to update the ACLs property of the source dataset.
    source_dataset = client.update_dataset(source_dataset, ["access_entries"])
    print(f"Access to source: {source_dataset.access_entries}")
    # [END bigquery_grant_view_access]
    return view_dataset, source_dataset
Example #13
0
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id)  # Make an API request.

entry = bigquery.AccessEntry(
    role="READER",
    entity_type="userByEmail",
    entity_id="*****@*****.**",
)

entries = list(dataset.access_entries)
entries.append(entry)
dataset.access_entries = entries

dataset = client.update_dataset(dataset,
                                ["access_entries"])  # Make an API request.

full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
print("Updated dataset '{}' with modified user permissions.".format(
    full_dataset_id))
Example #14
0
    # Crate the read-only dataset if not already exist
    m = re.search('(.*)@.*', group_email)
    shared_dataset_id = m.group(1).replace('.', '_')
    shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id +
                                                     '_ro'))
    shared_dataset.location = 'EU'
    try:
        shared_dataset = client.create_dataset(shared_dataset)  # API request
    except Exception, e:
        logging.error(str(e))

# Authorize the group to access the shared read-only dataset
    try:
        access_entries = shared_dataset.access_entries
        access_entries.append(
            bigquery.AccessEntry('READER', 'groupByEmail', group_email))
        shared_dataset.access_entries = access_entries
        shared_dataset = client.update_dataset(
            shared_dataset, ['access_entries'])  # API request
    except Exception, e:
        logging.error(str(e))

# Create the shared view in the new dataset. First delete view if it already exists
    try:
        table = client.get_table(shared_dataset.table(shared_view_id))
        if (table.view_query != None):
            client.delete_table(shared_dataset.table(shared_view_id))
    except Exception, e:
        logging.error(str(e))

    view = bigquery.Table(shared_dataset.table(shared_view_id))
Example #15
0
    csv_reader = reader(read_obj)
    next(csv_reader, None)  # skip the headers
    # Iterate over each row in the csv using reader object
    for row in csv_reader:
        # row variable is a list that represents a row in csv
        dataset_id = row[1].strip()
        emailid = row[2].strip()
        role = row[3].strip()
        print(emailid)
        print(dataset_id)
        print(role)

        dataset = client.get_dataset(dataset_id)  # Make an API request.

        entry = bigquery.AccessEntry(
            role=role,  #'Reader'
            entity_type="userByEmail",
            entity_id=emailid  #"*****@*****.**",
        )

        entries = list(dataset.access_entries)
        entries.append(entry)

        dataset.access_entries = entries
        dataset = client.update_dataset(
            dataset, ["access_entries"])  # Make an API request.

        full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
        print("Updated dataset '{}' with modified user permissions.".format(
            full_dataset_id))
Example #16
0
def run_authorized_view_tutorial(override_values={}):
    # Note to user: This is a group email for testing purposes. Replace with
    # your own group email address when running this code.
    analyst_group_email = "*****@*****.**"

    # [START bigquery_authorized_view_tutorial]
    # Create a source dataset
    # [START bigquery_avt_create_source_dataset]
    from google.cloud import bigquery

    client = bigquery.Client()
    source_dataset_id = "github_source_data"
    source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id)

    # [END bigquery_authorized_view_tutorial]
    # [END bigquery_avt_create_source_dataset]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    source_dataset_id = override_values.get("source_dataset_id",
                                            source_dataset_id)
    source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id)
    # [START bigquery_authorized_view_tutorial]
    # [START bigquery_avt_create_source_dataset]

    source_dataset = bigquery.Dataset(source_dataset_id_full)
    # Specify the geographic location where the dataset should reside.
    source_dataset.location = "US"
    source_dataset = client.create_dataset(source_dataset)  # API request
    # [END bigquery_avt_create_source_dataset]

    # Populate a source table
    # [START bigquery_avt_create_source_table]
    source_table_id = "github_contributors"
    job_config = bigquery.QueryJobConfig()
    job_config.destination = source_dataset.table(source_table_id)
    sql = """
        SELECT commit, author, committer, repo_name
        FROM `bigquery-public-data.github_repos.commits`
        LIMIT 1000
    """
    query_job = client.query(
        sql,
        # Location must match that of the dataset(s) referenced in the query
        # and of the destination table.
        location="US",
        job_config=job_config,
    )  # API request - starts the query

    query_job.result()  # Waits for the query to finish
    # [END bigquery_avt_create_source_table]

    # Create a separate dataset to store your view
    # [START bigquery_avt_create_shared_dataset]
    shared_dataset_id = "shared_views"
    shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id)

    # [END bigquery_authorized_view_tutorial]
    # [END bigquery_avt_create_shared_dataset]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    shared_dataset_id = override_values.get("shared_dataset_id",
                                            shared_dataset_id)
    shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id)
    # [START bigquery_authorized_view_tutorial]
    # [START bigquery_avt_create_shared_dataset]

    shared_dataset = bigquery.Dataset(shared_dataset_id_full)
    shared_dataset.location = "US"
    shared_dataset = client.create_dataset(shared_dataset)  # API request
    # [END bigquery_avt_create_shared_dataset]

    # Create the view in the new dataset
    # [START bigquery_avt_create_view]
    shared_view_id = "github_analyst_view"
    view = bigquery.Table(shared_dataset.table(shared_view_id))
    sql_template = """
        SELECT
            commit, author.name as author,
            committer.name as committer, repo_name
        FROM
            `{}.{}.{}`
    """
    view.view_query = sql_template.format(client.project, source_dataset_id,
                                          source_table_id)
    view = client.create_table(view)  # API request
    # [END bigquery_avt_create_view]

    # Assign access controls to the dataset containing the view
    # [START bigquery_avt_shared_dataset_access]
    # analyst_group_email = '*****@*****.**'
    access_entries = shared_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email))
    shared_dataset.access_entries = access_entries
    shared_dataset = client.update_dataset(shared_dataset,
                                           ["access_entries"])  # API request
    # [END bigquery_avt_shared_dataset_access]

    # Authorize the view to access the source dataset
    # [START bigquery_avt_source_dataset_access]
    access_entries = source_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry(None, "view", view.reference.to_api_repr()))
    source_dataset.access_entries = access_entries
    source_dataset = client.update_dataset(source_dataset,
                                           ["access_entries"])  # API request
Example #17
0
def test_manage_views(client, to_delete):
    project = client.project
    source_dataset_id = "source_dataset_{}".format(_millis())
    source_dataset_ref = client.dataset(source_dataset_id)
    source_dataset = bigquery.Dataset(source_dataset_ref)
    source_dataset = client.create_dataset(source_dataset)
    to_delete.append(source_dataset)

    job_config = bigquery.LoadJobConfig()
    job_config.schema = [
        bigquery.SchemaField("name", "STRING"),
        bigquery.SchemaField("post_abbr", "STRING"),
    ]
    job_config.skip_leading_rows = 1
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
    source_table_id = "us_states"
    load_job = client.load_table_from_uri(
        uri, source_dataset.table(source_table_id), job_config=job_config
    )
    load_job.result()

    shared_dataset_id = "shared_dataset_{}".format(_millis())
    shared_dataset_ref = client.dataset(shared_dataset_id)
    shared_dataset = bigquery.Dataset(shared_dataset_ref)
    shared_dataset = client.create_dataset(shared_dataset)
    to_delete.append(shared_dataset)

    # [START bigquery_create_view]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = 'my-project'
    # source_dataset_id = 'my_source_dataset'
    # source_table_id = 'us_states'
    # shared_dataset_ref = client.dataset('my_shared_dataset')

    # This example shows how to create a shared view of a source table of
    # US States. The source table contains all 50 states, while the view will
    # contain only states with names starting with 'W'.
    view_ref = shared_dataset_ref.table("my_shared_view")
    view = bigquery.Table(view_ref)
    sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"'
    view.view_query = sql_template.format(project, source_dataset_id, source_table_id)
    view = client.create_table(view)  # API request

    print("Successfully created view at {}".format(view.full_table_id))
    # [END bigquery_create_view]

    # [START bigquery_update_view_query]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = 'my-project'
    # source_dataset_id = 'my_source_dataset'
    # source_table_id = 'us_states'
    # shared_dataset_ref = client.dataset('my_shared_dataset')

    # This example shows how to update a shared view of a source table of
    # US States. The view's query will be updated to contain only states with
    # names starting with 'M'.
    view_ref = shared_dataset_ref.table("my_shared_view")
    view = bigquery.Table(view_ref)
    sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"'
    view.view_query = sql_template.format(project, source_dataset_id, source_table_id)
    view = client.update_table(view, ["view_query"])  # API request
    # [END bigquery_update_view_query]

    # [START bigquery_get_view]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # shared_dataset_id = 'my_shared_dataset'

    view_ref = client.dataset(shared_dataset_id).table("my_shared_view")
    view = client.get_table(view_ref)  # API Request

    # Display view properties
    print("View at {}".format(view.full_table_id))
    print("View Query:\n{}".format(view.view_query))
    # [END bigquery_get_view]
    assert view.view_query is not None

    analyst_group_email = "*****@*****.**"
    # [START bigquery_grant_view_access]
    # from google.cloud import bigquery
    # client = bigquery.Client()

    # Assign access controls to the dataset containing the view
    # shared_dataset_id = 'my_shared_dataset'
    # analyst_group_email = '*****@*****.**'
    shared_dataset = client.get_dataset(
        client.dataset(shared_dataset_id)
    )  # API request
    access_entries = shared_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email)
    )
    shared_dataset.access_entries = access_entries
    shared_dataset = client.update_dataset(
        shared_dataset, ["access_entries"]
    )  # API request

    # Authorize the view to access the source dataset
    # project = 'my-project'
    # source_dataset_id = 'my_source_dataset'
    source_dataset = client.get_dataset(
        client.dataset(source_dataset_id)
    )  # API request
    view_reference = {
        "projectId": project,
        "datasetId": shared_dataset_id,
        "tableId": "my_shared_view",
    }
    access_entries = source_dataset.access_entries
    access_entries.append(bigquery.AccessEntry(None, "view", view_reference))
    source_dataset.access_entries = access_entries
    source_dataset = client.update_dataset(
        source_dataset, ["access_entries"]
    )  # API request
Example #18
0
from google.cloud import bigquery

project_id = "PROJECT_ID"
dataset_id = "DATASET_NAME"
group_name= "*****@*****.**"
role = "READER"

client = bigquery.Client(project=project_id)

dataset_info = client.get_dataset(client.dataset(dataset_id))

access_entries = dataset_info.access_entries
access_entries.append(
        bigquery.AccessEntry(role, "groupByEmail", group_name)
)

dataset_info.access_entries = access_entries
dataset_info = client.update_dataset(
    dataset_info, ['access_entries']) 
Example #19
0
    def execute(self, data):

        #setup
        client = self.client
        bigquery = self.bigquery
        datetime = self.datetime
        pytz = self.pytz
        time = self.time
        name = data.get("titleName") if data.get(
            "titleName") else "My_Target_Table"
        emails = data.get("emails") if data.get("emails") else [
            "*****@*****.**"
        ]
        query = data.get("query")
        source_url = data.get("sourceURL") if data.get(
            "titleName"
        ) else "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
        emails = data.get("emails")
        table = ""
        #

        # create a dataset first if needed
        dataset_main = self.make_dataset()
        table_id = "{}.{}".format(dataset_main[0], name)
        #

        #create a table if needed
        table = self.make_table(table_id, "load")
        #

        # create a view
        if (self.env.get("create_view")):
            try:
                view_id = "{}.{}".format(dataset_main[1], "My_View")
                source_id = table_id
                view = bigquery.Table(view_id)

                view.view_query = query or f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'"

                # Make an API request to create the view.
                view = client.create_table(view)
                return f"Created {view.table_type}: {str(view.reference)}"
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # create an anuthorized view
        elif (self.env.get("create_authorized_view")):
            try:

                view_dataset_id = dataset_main[1]
                view_dataset = client.get_dataset(view_dataset_id)

                access_entries = view_dataset.access_entries
                for email in emails:
                    access_entries.append(
                        bigquery.AccessEntry("READER", "userByEmail", email))
                view_dataset.access_entries = access_entries

                view_dataset = client.update_dataset(view_dataset,
                                                     ["access_entries"])

                source_dataset_id = dataset_main[0]
                source_dataset = client.get_dataset(source_dataset_id)

                view_reference = {
                    "projectId": client.project,
                    "datasetId": self.dataset_names[0],
                    "tableId": name,
                }
                access_entries = source_dataset.access_entries
                access_entries.append(
                    bigquery.AccessEntry(None, "view", view_reference))
                source_dataset.access_entries = access_entries

                # Make an API request to update the ACLs property of the source dataset.
                source_dataset = client.update_dataset(source_dataset,
                                                       ["access_entries"])
                emailList = ""
                for email in emails:
                    emailList += email + ", "
                return f"""
                Access to view : {emailList}, and the view has access to the source table, 
                which means who has access can use the view
                """
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # list views
        elif (self.env.get("list_views")):
            try:
                dataset_id = dataset_main[0]
                for view in ["view_1", "view_2", "view_3"]:
                    view_id = "{}.{}".format(dataset_main[0], view)
                    source_id = table_id
                    view = bigquery.Table(view_id)

                    view.view_query = query or f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'"

                    # Make an API request to create the view.
                    view = client.create_table(view)
                tables = client.list_tables(dataset_id)  # Make an API request.

                print("Tables contained in '{}':".format(dataset_id))
                views = ""
                for table in tables:
                    if (table.table_type == "VIEW"):
                        views += table.table_id + " ,"
                return "List of views in the dataset {}".format(views)
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        # update view query
        elif (self.env.get("update_views")):
            try:
                view_id = "{}.{}".format(dataset_main[1], "My_View")
                source_id = table_id
                view = bigquery.Table(view_id)

                view.view_query = f"SELECT name FROM `{source_id}` WHERE name LIKE 'M%'"

                view = client.update_table(view, ["view_query"])
                return f"Updated {view.table_type}: {str(view.reference)}"
            except BaseException as e:
                print('my custom error\n')
                print(e.__class__.__name__)
                print('\n')
                print(e)
                return 'an error occured check the output from the backend'
        #

        return "Check the backend env dictionary you did set it so the backend didnt do anything"
Example #20
0
    def execute(self, name):
        # create a dataset
        if (self.env.get("create")):
            dataset_id = self.make_dataset_id(name)
            dataset_init = bigquery.Dataset(dataset_id)
            dataset_init.location = "US"  # multi-region
            try:
                if (name == ""):
                    raise IndexError
                dataset = client.create_dataset(dataset_init, timeout=30)
                return "Created dataset {}.{}".format(client.project,
                                                      dataset.dataset_id)
            except IndexError:
                return "Please input a name"
            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "Dataset already exists choose another name"
        #

        # copy a dataset
        elif (self.env.get("copy")):
            transfer_client = bigquery_datatransfer.DataTransferServiceClient()

            # create a source dataset
            source_project_id = client.project
            source_dataset_id = "my_source_dataset"
            dataset_id = self.make_dataset_id(source_dataset_id)
            dataset_init = bigquery.Dataset(dataset_id)
            try:
                dataset = client.create_dataset(dataset_init, timeout=30)
            except BaseException as e:
                print("")  #no-op
            #

            try:
                destination_project_id = client.project
                destination_dataset_id = name
                if (name == ""):
                    raise IndexError
                transfer_config = bigquery_datatransfer.TransferConfig(
                    destination_dataset_id=destination_dataset_id,
                    display_name="Your Dataset Copy Name",
                    data_source_id="cross_region_copy",
                    params={
                        "source_project_id": source_project_id,
                        "source_dataset_id": source_dataset_id,
                    },
                    # schedule="from ",
                )
                transfer_config = transfer_client.create_transfer_config(
                    parent=transfer_client.common_project_path(
                        destination_project_id),
                    transfer_config=transfer_config,
                )
                return "Created transfer config: {}".format(
                    transfer_config.name)
            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "Was not able to make a copy mabye check the terminal where the python server is running"
        #

        # access control
        elif (self.env.get("access_control")):
            try:
                dataset_id = self.make_dataset_id(name)
                dataset = client.get_dataset(dataset_id)
                entry = bigquery.AccessEntry(
                    role="READER",
                    entity_type="userByEmail",
                    entity_id="*****@*****.**",
                )

                entries = list(dataset.access_entries)
                entries.append(entry)
                dataset.access_entries = entries
                dataset = client.update_dataset(dataset, ["access_entries"])
                full_dataset_id = "{}.{}".format(dataset.project,
                                                 dataset.dataset_id)
                return "Updated dataset '{}' with modified user permissions.".format(
                    full_dataset_id)
            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "An error occured"
        #

        # list all datasets
        elif (self.env.get("list")):
            try:
                datasets = list(client.list_datasets())
                project = client.project
                value = "datasets in project {}".format(project)
                if datasets:
                    for dataset in datasets:
                        value += "\n{}".format(dataset.dataset_id)
                    return value
                else:
                    return "{} project does not contain any datasets.".format(
                        project)
            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "An error occured"
        #

        # metadata
        elif (self.env.get("metadata")):
            value = "Metadata:\n"
            try:
                # get the dataset friendly name
                dataset_id = self.make_dataset_id(name)
                dataset = client.get_dataset(dataset_id)
                full_dataset_id = "{}.{}".format(dataset.project,
                                                 dataset.dataset_id)
                value += "{}: {}\n".format("Friendly Name",
                                           dataset.friendly_name)
                #

                # more properties
                value += "{}: {}\n".format("Description", dataset.description)
                value += "Labels:\n"
                labels = dataset.labels
                if labels:
                    for label, value in labels.items():
                        value += "\n{}: {}".format(label, value)
                else:
                    value += "\tDataset has no labels defined.\n"
                #

                # View tables
                value += "Tables:"
                tables = list(client.list_tables(dataset))
                if tables:
                    for table in tables:
                        value += "\n{}:".format(table.table_id)
                else:
                    value += "\tDataset does not have tables.\n"
                #

                return value

            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "An error occured"
        #

        # update dataset
        elif (self.env.get("update")):

            value = ""
            try:
                # update dataset description
                dataset_id = self.make_dataset_id(name)
                dataset = client.get_dataset(
                    dataset_id)  # Make an API request.
                print(dataset)
                dataset.description = "Updated description."
                dataset = client.update_dataset(
                    dataset, ["description"])  # Make an API request.

                full_dataset_id = "{}.{}".format(dataset.project,
                                                 dataset.dataset_id)
                value += "Updated dataset '{}' with description '{}'.\n".format(
                    full_dataset_id, dataset.description)
                #

                # update default table expiration
                dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000  # In milliseconds means in 24 hours
                dataset = client.update_dataset(
                    dataset, ["default_table_expiration_ms"])
                value += "Updated dataset '{}' with table expiration '{}'.\n".format(
                    full_dataset_id, dataset.default_table_expiration_ms)
                #

                # update default partition expiration
                dataset.default_partition_expiration_ms = 24 * 60 * 60 * 1000  # In milliseconds means in 24 hours
                dataset = client.update_dataset(
                    dataset, ["default_partition_expiration_ms"])
                value += "Updated dataset '{}' with table partition expiration '{}'.\n".format(
                    full_dataset_id, dataset.default_partition_expiration_ms)
                #
                return value

            except IndexError:
                return "Provide the dataset name"
            except BaseException as e:
                print("\nlook here\n")
                print(e)
                return "An error occured"
        #

        # delete dataset
        elif (self.env.get("delete")):
            try:
                dataset_id = self.make_dataset_id(name)
                client.delete_dataset(dataset_id,
                                      delete_contents=True,
                                      not_found_ok=False)

                return "Deleted dataset '{}'.".format(dataset_id)
            except IndexError:
                return "Provide the dataset name"
            except BaseException as e:
                print("\nlook here\n")
                print(e.__class__.__name__)
                if (e.__class__.__name__ == "NotFound"):
                    return "Could not find dataset with name {} to delete".format(
                        dataset_id)
                return "An error occured"
Example #21
0
def update_datasets(**kwargs):
    """
    Updates datasets in Big Query.
    :param bq_client: Target Big Query Client (type:google.cloud.bigquery.client.Client)
    :param project_id: GCP Project_Id (type:str)
    :param datasets: List of Datasets (type:list)
    :param properties: Dataset Properties (type:dict)
    :return dataset_updation_flag: {0:SUCCESS, 1:FAIL} (type:int)
    """
    bq_client = kwargs.get("bq_client")
    project_id = kwargs.get("project_id")
    datasets = kwargs.get("datasets", [])
    properties = kwargs.get("properties", {})
    dataset_updation_flag = 0
    try:
        for dataset_id in datasets:
            dataset_id = project_id + "." + dataset_id
            try:
                dataset = bq_client.get_dataset(dataset_id)
                for k1, v1 in properties.items():
                    if v1:
                        if k1 == "description":
                            dataset.description = v1
                            dataset = bq_client.update_dataset(dataset, ["description"])
                        elif k1 == "default_table_expiration_ms":
                            dataset.default_table_expiration_ms = v1
                            dataset = bq_client.update_dataset(
                                dataset, ["default_table_expiration_ms"]
                            )
                        elif k1 == "labels":
                            dataset.labels = v1
                            dataset = bq_client.update_dataset(dataset, ["labels"])
                        elif k1 == "access_controls":
                            v1_vals = v1.values()
                            if None in v1_vals or True in [
                                str(elem).isspace() for elem in v1_vals
                            ]:
                                pass
                            else:
                                entry = bigquery.AccessEntry(
                                    role=properties["access_controls"]["role"],
                                    entity_type=properties["access_controls"][
                                        "entity_type"
                                    ],
                                    entity_id=properties["access_controls"][
                                        "entity_id"
                                    ],
                                )
                                entries = list(dataset.access_entries)
                                entries.append(entry)
                                dataset.access_entries = entries
                                dataset = bq_client.update_dataset(
                                    dataset, ["access_entries"]
                                )
            except NotFound:
                print("Dataset {} does not exist.".format(dataset_id))
                dataset_updation_flag = dataset_updation_flag + 1
    except Exception as e:
        print("Exception occurred: {}".format(e))
        dataset_updation_flag = dataset_updation_flag + 1
    finally:
        return dataset_updation_flag
def run_authorized_view_tutorial():
    # Note to user: This is a group email for testing purposes. Replace with
    # your own group email address when running this code.
    analyst_group_email = '*****@*****.**'

    # [START bigquery_authorized_view_tutorial]
    # Create a source dataset
    # [START bigquery_avt_create_source_dataset]
    from google.cloud import bigquery

    client = bigquery.Client()
    source_dataset_id = 'github_source_data'

    source_dataset = bigquery.Dataset(client.dataset(source_dataset_id))
    # Specify the geographic location where the dataset should reside.
    source_dataset.location = 'US'
    source_dataset = client.create_dataset(source_dataset)  # API request
    # [END bigquery_avt_create_source_dataset]

    # Populate a source table
    # [START bigquery_avt_create_source_table]
    source_table_id = 'github_contributors'
    job_config = bigquery.QueryJobConfig()
    job_config.destination = source_dataset.table(source_table_id)
    sql = """
        SELECT commit, author, committer, repo_name
        FROM `bigquery-public-data.github_repos.commits`
        LIMIT 1000
    """
    query_job = client.query(
        sql,
        # Location must match that of the dataset(s) referenced in the query
        # and of the destination table.
        location='US',
        job_config=job_config)  # API request - starts the query

    query_job.result()  # Waits for the query to finish
    # [END bigquery_avt_create_source_table]

    # Create a separate dataset to store your view
    # [START bigquery_avt_create_shared_dataset]
    shared_dataset_id = 'shared_views'
    shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id))
    shared_dataset.location = 'US'
    shared_dataset = client.create_dataset(shared_dataset)  # API request
    # [END bigquery_avt_create_shared_dataset]

    # Create the view in the new dataset
    # [START bigquery_avt_create_view]
    shared_view_id = 'github_analyst_view'
    view = bigquery.Table(shared_dataset.table(shared_view_id))
    sql_template = """
        SELECT
            commit, author.name as author,
            committer.name as committer, repo_name
        FROM
            `{}.{}.{}`
    """
    view.view_query = sql_template.format(
        client.project, source_dataset_id, source_table_id)
    view = client.create_table(view)  # API request
    # [END bigquery_avt_create_view]

    # Assign access controls to the dataset containing the view
    # [START bigquery_avt_shared_dataset_access]
    # analyst_group_email = '*****@*****.**'
    access_entries = shared_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email)
    )
    shared_dataset.access_entries = access_entries
    shared_dataset = client.update_dataset(
        shared_dataset, ['access_entries'])  # API request
    # [END bigquery_avt_shared_dataset_access]

    # Authorize the view to access the source dataset
    # [START bigquery_avt_source_dataset_access]
    access_entries = source_dataset.access_entries
    access_entries.append(
        bigquery.AccessEntry(None, 'view', view.reference.to_api_repr())
    )
    source_dataset.access_entries = access_entries
    source_dataset = client.update_dataset(
        source_dataset, ['access_entries'])  # API request
    # [END bigquery_avt_source_dataset_access]
    # [END bigquery_authorized_view_tutorial]
    return (source_dataset, shared_dataset)