Ejemplo n.º 1
0
 def test_encryption_configuration_setter(self):
     from google.cloud.bigquery.table import EncryptionConfiguration
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     encryption_configuration = EncryptionConfiguration(
         kms_key_name=self.KMS_KEY_NAME)
     table.encryption_configuration = encryption_configuration
     self.assertEqual(table.encryption_configuration.kms_key_name,
                      self.KMS_KEY_NAME)
     table.encryption_configuration = None
     self.assertIsNone(table.encryption_configuration)
Ejemplo n.º 2
0
def _preprocess_config(cfg):
    destination_encryption_configuration = cfg.get(
        'destination_encryption_configuration')
    time_partitioning = cfg.get('time_partitioning')

    if destination_encryption_configuration is not None:
        cfg['destination_encryption_configuration'] = EncryptionConfiguration(
            kms_key_name=destination_encryption_configuration)

    if time_partitioning is not None:
        cfg['time_partitioning'] = TimePartitioning(**time_partitioning)

    return cfg
Ejemplo n.º 3
0
@pytest.mark.parametrize(
    "param, value, default",
    [
        ("clustering_fields", ["a", "b", "c"], None),
        ("create_disposition", "CREATE_IF_NEEDED", None),
        (
            "destination",
            TableReference(
                DatasetReference("different-project", "different-dataset"), "table"
            ),
            None,
        ),
        (
            "destination_encryption_configuration",
            lambda enc: enc.kms_key_name
            == EncryptionConfiguration("some-configuration").kms_key_name,
            None,
        ),
        ("dry_run", True, None),
        ("labels", {"a": "b", "c": "d"}, {}),
        ("maximum_bytes_billed", 1000, None),
        ("priority", "INTERACTIVE", None),
        (
            "schema_update_options",
            ["ALLOW_FIELD_ADDITION", "ALLOW_FIELD_RELAXATION"],
            None,
        ),
        ("use_query_cache", True, None),
        ("write_disposition", "WRITE_APPEND", None),
    ],
)
def parse_url(url):  # noqa: C901
    query = dict(url.query)  # need mutable query.

    # use_legacy_sql (legacy)
    if "use_legacy_sql" in query:
        raise ValueError("legacy sql is not supported by this dialect")
    # allow_large_results (legacy)
    if "allow_large_results" in query:
        raise ValueError(
            "allow_large_results is only allowed for legacy sql, which is not supported by this dialect"
        )
    # flatten_results (legacy)
    if "flatten_results" in query:
        raise ValueError(
            "flatten_results is only allowed for legacy sql, which is not supported by this dialect"
        )
    # maximum_billing_tier (deprecated)
    if "maximum_billing_tier" in query:
        raise ValueError("maximum_billing_tier is a deprecated argument")

    project_id = url.host
    location = None
    dataset_id = url.database or None
    arraysize = None
    credentials_path = None

    # location
    if "location" in query:
        location = query.pop("location")

    # credentials_path
    if "credentials_path" in query:
        credentials_path = query.pop("credentials_path")

    # arraysize
    if "arraysize" in query:
        str_arraysize = query.pop("arraysize")
        try:
            arraysize = int(str_arraysize)
        except ValueError:
            raise ValueError("invalid int in url query arraysize: " +
                             str_arraysize)

    # if only these "non-config" values were present, the dict will now be empty
    if not query:
        # if a dataset_id exists, we need to return a job_config that isn't None
        # so it can be updated with a dataset reference from the client
        if dataset_id:
            return (
                project_id,
                location,
                dataset_id,
                arraysize,
                credentials_path,
                QueryJobConfig(),
            )
        else:
            return project_id, location, dataset_id, arraysize, credentials_path, None

    job_config = QueryJobConfig()

    # clustering_fields list(str)
    if "clustering_fields" in query:
        clustering_fields = GROUP_DELIMITER.split(query["clustering_fields"])
        job_config.clustering_fields = list(clustering_fields)

    # create_disposition
    if "create_disposition" in query:
        create_disposition = query["create_disposition"]
        try:
            job_config.create_disposition = getattr(CreateDisposition,
                                                    create_disposition)
        except AttributeError:
            raise ValueError("invalid create_disposition in url query: " +
                             create_disposition)

    # default_dataset
    if "default_dataset" in query or "dataset_id" in query or "project_id" in query:
        raise ValueError(
            "don't pass default_dataset, dataset_id, project_id in url query, instead use the url host and database"
        )

    # destination
    if "destination" in query:
        dest_project = None
        dest_dataset = None
        dest_table = None

        try:
            dest_project, dest_dataset, dest_table = query[
                "destination"].split(".")
        except ValueError:
            raise ValueError(
                "url query destination parameter should be fully qualified with project, dataset, and table"
            )

        job_config.destination = TableReference(
            DatasetReference(dest_project, dest_dataset), dest_table)

    # destination_encryption_configuration
    if "destination_encryption_configuration" in query:
        job_config.destination_encryption_configuration = EncryptionConfiguration(
            query["destination_encryption_configuration"])

    # dry_run
    if "dry_run" in query:
        try:
            job_config.dry_run = parse_boolean(query["dry_run"])
        except ValueError:
            raise ValueError("invalid boolean in url query for dry_run: " +
                             query["dry_run"])

    # labels
    if "labels" in query:
        label_groups = GROUP_DELIMITER.split(query["labels"])
        labels = {}
        for label_group in label_groups:
            try:
                key, value = KEY_VALUE_DELIMITER.split(label_group)
            except ValueError:
                raise ValueError("malformed url query in labels: " +
                                 label_group)
            labels[key] = value

        job_config.labels = labels

    # maximum_bytes_billed
    if "maximum_bytes_billed" in query:
        try:
            job_config.maximum_bytes_billed = int(
                query["maximum_bytes_billed"])
        except ValueError:
            raise ValueError(
                "invalid int in url query maximum_bytes_billed: " +
                query["maximum_bytes_billed"])

    # priority
    if "priority" in query:
        try:
            job_config.priority = getattr(QueryPriority, query["priority"])
        except AttributeError:
            raise ValueError("invalid priority in url query: " +
                             query["priority"])

    # query_parameters
    if "query_parameters" in query:
        raise NotImplementedError("url query query_parameters not implemented")

    # schema_update_options
    if "schema_update_options" in query:
        schema_update_options = GROUP_DELIMITER.split(
            query["schema_update_options"])
        try:
            job_config.schema_update_options = [
                getattr(SchemaUpdateOption, schema_update_option)
                for schema_update_option in schema_update_options
            ]
        except AttributeError:
            raise ValueError("invalid schema_update_options in url query: " +
                             query["schema_update_options"])

    # table_definitions
    if "table_definitions" in query:
        raise NotImplementedError(
            "url query table_definitions not implemented")

    # time_partitioning
    if "time_partitioning" in query:
        raise NotImplementedError(
            "url query time_partitioning not implemented")

    # udf_resources
    if "udf_resources" in query:
        raise NotImplementedError("url query udf_resources not implemented")

    # use_query_cache
    if "use_query_cache" in query:
        try:
            job_config.use_query_cache = parse_boolean(
                query["use_query_cache"])
        except ValueError:
            raise ValueError(
                "invalid boolean in url query for use_query_cache: " +
                query["use_query_cache"])

    # write_disposition
    if "write_disposition" in query:
        try:
            job_config.write_disposition = getattr(WriteDisposition,
                                                   query["write_disposition"])
        except AttributeError:
            raise ValueError("invalid write_disposition in url query: " +
                             query["write_disposition"])

    return project_id, location, dataset_id, arraysize, credentials_path, job_config
    project_id, location, dataset_id, arraysize, credentials_path, job_config = parse_url(url_with_everything)

    assert project_id == 'some-project'
    assert location == 'some-location'
    assert dataset_id == 'some-dataset'
    assert arraysize == 1000
    assert credentials_path == '/some/path/to.json'
    assert isinstance(job_config, QueryJobConfig)


@pytest.mark.parametrize('param, value', [
    ('clustering_fields', ['a', 'b', 'c']),
    ('create_disposition', 'CREATE_IF_NEEDED'),
    ('destination', TableReference(DatasetReference('different-project', 'different-dataset'), 'table')),
    ('destination_encryption_configuration',
     lambda enc: enc.kms_key_name == EncryptionConfiguration('some-configuration').kms_key_name),
    ('dry_run', True),
    ('labels', {'a': 'b', 'c': 'd'}),
    ('maximum_bytes_billed', 1000),
    ('priority', 'INTERACTIVE'),
    ('schema_update_options', ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION']),
    ('use_query_cache', True),
    ('write_disposition', 'WRITE_APPEND'),
])
def test_all_values(url_with_everything, param, value):
    job_config = parse_url(url_with_everything)[5]

    config_value = getattr(job_config, param)
    if callable(value):
        assert value(config_value)
    else:
Ejemplo n.º 6
0
    assert project_id == 'some-project'
    assert location == 'some-location'
    assert dataset_id == 'some-dataset'
    assert arraysize == 1000
    assert credentials_path == '/some/path/to.json'
    assert isinstance(job_config, QueryJobConfig)


@pytest.mark.parametrize('param, value', [
    ('clustering_fields', ['a', 'b', 'c']),
    ('create_disposition', 'CREATE_IF_NEEDED'),
    ('destination',
     TableReference(DatasetReference('different-project', 'different-dataset'),
                    'table')),
    ('destination_encryption_configuration', lambda enc: enc.kms_key_name ==
     EncryptionConfiguration('some-configuration').kms_key_name),
    ('dry_run', True),
    ('labels', {
        'a': 'b',
        'c': 'd'
    }),
    ('maximum_bytes_billed', 1000),
    ('priority', 'INTERACTIVE'),
    ('schema_update_options',
     ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION']),
    ('use_query_cache', True),
    ('write_disposition', 'WRITE_APPEND'),
])
def test_all_values(url_with_everything, param, value):
    job_config = parse_url(url_with_everything)[5]
Ejemplo n.º 7
0

@pytest.mark.parametrize(
    "param, value",
    [
        ("clustering_fields", ["a", "b", "c"]),
        ("create_disposition", "CREATE_IF_NEEDED"),
        (
            "destination",
            TableReference(
                DatasetReference("different-project", "different-dataset"),
                "table"),
        ),
        (
            "destination_encryption_configuration",
            lambda enc: enc.kms_key_name == EncryptionConfiguration(
                "some-configuration").kms_key_name,
        ),
        ("dry_run", True),
        ("labels", {
            "a": "b",
            "c": "d"
        }),
        ("maximum_bytes_billed", 1000),
        ("priority", "INTERACTIVE"),
        ("schema_update_options",
         ["ALLOW_FIELD_ADDITION", "ALLOW_FIELD_RELAXATION"]),
        ("use_query_cache", True),
        ("write_disposition", "WRITE_APPEND"),
    ],
)
def test_all_values(url_with_everything, param, value):