Ejemplo n.º 1
0
    def create_tables_from_dict(self,
                                table_names_to_schemas,  # type: Dict[str, List[SchemaField]]
                                dataset_id=None,  # type: Optional[str]
                                replace_existing_tables=False,  # type: Optional[bool]
                                ):
        # type: (...) -> None
        """Creates a set of tables from a dictionary of table names to their schemas.

        Args:
          table_names_to_schemas: A dictionary of:
            key: The table name.
            value: A list of SchemaField objects.
          dataset_id: The dataset in which to create tables. If not specified, use default dataset.
          replace_existing_tables: If True, delete and re-create tables. Otherwise, checks to see
              if any of the requested tables exist. If they do, it will raise a RuntimeError.

        Raises:
            RuntimeError if replace_existing_tables is False and any of the tables requested for
                creation already exist
        """

        dataset_ref = DatasetReference(self.project_id,
                                       dataset_id if dataset_id else self.default_dataset_id)

        # If the flag isn't set to replace existing tables, raise an error if any tables we're
        # trying to create already exist.
        if not replace_existing_tables:
            self._raise_if_tables_exist(table_names_to_schemas.keys())

        for name, schema in table_names_to_schemas.iteritems():
            table_ref = TableReference(dataset_ref, name)
            # Use the Table object so it retains its schema.
            table = bigquery.Table(table_ref, schema=schema)

            if self.table_exists(table) and replace_existing_tables:
                self.delete_table(table)
            self.create_table(table)
Ejemplo n.º 2
0
def test_create_dataset_w_custom_property(client, PROJECT, DS_ID):
    # The library should handle sending properties to the API that are not
    # yet part of the library

    path = "/projects/%s/datasets" % PROJECT
    resource = {
        "datasetReference": {
            "projectId": PROJECT,
            "datasetId": DS_ID
        },
        "newAlphaProperty": "unreleased property",
    }
    conn = client._connection = make_connection(resource)

    ds_ref = DatasetReference(PROJECT, DS_ID)
    before = Dataset(ds_ref)
    before._properties["newAlphaProperty"] = "unreleased property"
    after = client.create_dataset(before)

    assert after.dataset_id == DS_ID
    assert after.project == PROJECT
    assert after._properties["newAlphaProperty"] == "unreleased property"

    conn.api_request.assert_called_once_with(
        method="POST",
        path=path,
        data={
            "datasetReference": {
                "projectId": PROJECT,
                "datasetId": DS_ID
            },
            "newAlphaProperty": "unreleased property",
            "labels": {},
        },
        timeout=DEFAULT_TIMEOUT,
    )
Ejemplo n.º 3
0
 def test_labels_setter_bad_value(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     with self.assertRaises(ValueError):
         table.labels = 12345
Ejemplo n.º 4
0
 def test_external_data_configuration_setter_bad_value(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     with self.assertRaises(ValueError):
         table.external_data_configuration = 12345
Ejemplo n.º 5
0
 def test_view_use_legacy_sql_setter_bad_value(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     with self.assertRaises(ValueError):
         table.view_use_legacy_sql = 12345
Ejemplo n.º 6
0
 def test_location_setter(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     table.location = 'LOCATION'
     self.assertEqual(table.location, 'LOCATION')
Ejemplo n.º 7
0
class _Base(unittest.TestCase):
    from google.cloud.bigquery.dataset import DatasetReference
    from google.cloud.bigquery.table import TableReference

    ENDPOINT = "https://bigquery.googleapis.com"
    PROJECT = "project"
    SOURCE1 = "http://example.com/source1.csv"
    DS_ID = "dataset_id"
    DS_REF = DatasetReference(PROJECT, DS_ID)
    TABLE_ID = "table_id"
    TABLE_REF = TableReference(DS_REF, TABLE_ID)
    JOB_ID = "JOB_ID"
    JOB_TYPE = "unknown"
    KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"

    def _make_one(self, *args, **kw):
        return self._get_target_class()(*args, **kw)

    def _setUpConstants(self):
        import datetime
        from google.cloud._helpers import UTC

        self.WHEN_TS = 1437767599.006
        self.WHEN = datetime.datetime.utcfromtimestamp(
            self.WHEN_TS).replace(tzinfo=UTC)
        self.ETAG = "ETAG"
        self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID)
        self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format(
            self.ENDPOINT, self.PROJECT, self.JOB_ID)
        self.USER_EMAIL = "*****@*****.**"

    def _table_ref(self, table_id):
        from google.cloud.bigquery.table import TableReference

        return TableReference(self.DS_REF, table_id)

    def _make_resource(self, started=False, ended=False, location="US"):
        self._setUpConstants()
        return _make_job_resource(
            creation_time_ms=int(self.WHEN_TS * 1000),
            started_time_ms=int(self.WHEN_TS * 1000),
            ended_time_ms=int(self.WHEN_TS * 1000) + 1000000,
            started=started,
            ended=ended,
            etag=self.ETAG,
            endpoint=self.ENDPOINT,
            job_type=self.JOB_TYPE,
            job_id=self.JOB_ID,
            project_id=self.PROJECT,
            user_email=self.USER_EMAIL,
            location=location,
        )

    def _verifyInitialReadonlyProperties(self, job):
        # root elements of resource
        self.assertIsNone(job.etag)
        self.assertIsNone(job.self_link)
        self.assertIsNone(job.user_email)

        # derived from resource['statistics']
        self.assertIsNone(job.created)
        self.assertIsNone(job.started)
        self.assertIsNone(job.ended)

        # derived from resource['status']
        self.assertIsNone(job.error_result)
        self.assertIsNone(job.errors)
        self.assertIsNone(job.state)

    def _verifyReadonlyResourceProperties(self, job, resource):
        from datetime import timedelta

        statistics = resource.get("statistics", {})

        if "creationTime" in statistics:
            self.assertEqual(job.created, self.WHEN)
        else:
            self.assertIsNone(job.created)

        if "startTime" in statistics:
            self.assertEqual(job.started, self.WHEN)
        else:
            self.assertIsNone(job.started)

        if "endTime" in statistics:
            self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000))
        else:
            self.assertIsNone(job.ended)

        if "etag" in resource:
            self.assertEqual(job.etag, self.ETAG)
        else:
            self.assertIsNone(job.etag)

        if "selfLink" in resource:
            self.assertEqual(job.self_link, self.RESOURCE_URL)
        else:
            self.assertIsNone(job.self_link)

        if "user_email" in resource:
            self.assertEqual(job.user_email, self.USER_EMAIL)
        else:
            self.assertIsNone(job.user_email)
Ejemplo n.º 8
0
class TestDataset(unittest.TestCase):
    from google.cloud.bigquery.dataset import DatasetReference

    PROJECT = 'project'
    DS_ID = 'dataset-id'
    DS_REF = DatasetReference(PROJECT, DS_ID)

    @staticmethod
    def _get_target_class():
        from google.cloud.bigquery.dataset import Dataset

        return Dataset

    def _make_one(self, *args, **kw):
        return self._get_target_class()(*args, **kw)

    def _setUpConstants(self):
        import datetime
        from google.cloud._helpers import UTC

        self.WHEN_TS = 1437767599.006
        self.WHEN = datetime.datetime.utcfromtimestamp(
            self.WHEN_TS).replace(tzinfo=UTC)
        self.ETAG = 'ETAG'
        self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID)
        self.RESOURCE_URL = 'http://example.com/path/to/resource'

    def _make_resource(self):
        self._setUpConstants()
        USER_EMAIL = '*****@*****.**'
        GROUP_EMAIL = '*****@*****.**'
        return {
            'creationTime':
            self.WHEN_TS * 1000,
            'datasetReference': {
                'projectId': self.PROJECT,
                'datasetId': self.DS_ID
            },
            'etag':
            self.ETAG,
            'id':
            self.DS_FULL_ID,
            'lastModifiedTime':
            self.WHEN_TS * 1000,
            'location':
            'US',
            'selfLink':
            self.RESOURCE_URL,
            'defaultTableExpirationMs':
            3600,
            'access': [{
                'role': 'OWNER',
                'userByEmail': USER_EMAIL
            }, {
                'role': 'OWNER',
                'groupByEmail': GROUP_EMAIL
            }, {
                'role': 'WRITER',
                'specialGroup': 'projectWriters'
            }, {
                'role': 'READER',
                'specialGroup': 'projectReaders'
            }],
        }

    def _verify_access_entry(self, access_entries, resource):
        r_entries = []
        for r_entry in resource['access']:
            role = r_entry.pop('role')
            for entity_type, entity_id in sorted(r_entry.items()):
                r_entries.append({
                    'role': role,
                    'entity_type': entity_type,
                    'entity_id': entity_id
                })

        self.assertEqual(len(access_entries), len(r_entries))
        for a_entry, r_entry in zip(access_entries, r_entries):
            self.assertEqual(a_entry.role, r_entry['role'])
            self.assertEqual(a_entry.entity_type, r_entry['entity_type'])
            self.assertEqual(a_entry.entity_id, r_entry['entity_id'])

    def _verify_readonly_resource_properties(self, dataset, resource):

        self.assertEqual(dataset.project, self.PROJECT)
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.reference.project, self.PROJECT)
        self.assertEqual(dataset.reference.dataset_id, self.DS_ID)

        if 'creationTime' in resource:
            self.assertEqual(dataset.created, self.WHEN)
        else:
            self.assertIsNone(dataset.created)
        if 'etag' in resource:
            self.assertEqual(dataset.etag, self.ETAG)
        else:
            self.assertIsNone(dataset.etag)
        if 'lastModifiedTime' in resource:
            self.assertEqual(dataset.modified, self.WHEN)
        else:
            self.assertIsNone(dataset.modified)
        if 'selfLink' in resource:
            self.assertEqual(dataset.self_link, self.RESOURCE_URL)
        else:
            self.assertIsNone(dataset.self_link)

    def _verify_resource_properties(self, dataset, resource):

        self._verify_readonly_resource_properties(dataset, resource)

        if 'defaultTableExpirationMs' in resource:
            self.assertEqual(dataset.default_table_expiration_ms,
                             int(resource.get('defaultTableExpirationMs')))
        else:
            self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertEqual(dataset.description, resource.get('description'))
        self.assertEqual(dataset.friendly_name, resource.get('friendlyName'))
        self.assertEqual(dataset.location, resource.get('location'))

        if 'access' in resource:
            self._verify_access_entry(dataset.access_entries, resource)
        else:
            self.assertEqual(dataset.access_entries, [])

    def test_ctor_defaults(self):
        dataset = self._make_one(self.DS_REF)
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.project, self.PROJECT)
        self.assertEqual(
            dataset.path,
            '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID))
        self.assertEqual(dataset.access_entries, [])

        self.assertIsNone(dataset.created)
        self.assertIsNone(dataset.full_dataset_id)
        self.assertIsNone(dataset.etag)
        self.assertIsNone(dataset.modified)
        self.assertIsNone(dataset.self_link)

        self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertIsNone(dataset.description)
        self.assertIsNone(dataset.friendly_name)
        self.assertIsNone(dataset.location)

    def test_ctor_explicit(self):
        from google.cloud.bigquery.dataset import DatasetReference, AccessEntry

        phred = AccessEntry('OWNER', 'userByEmail', '*****@*****.**')
        bharney = AccessEntry('OWNER', 'userByEmail', '*****@*****.**')
        entries = [phred, bharney]
        OTHER_PROJECT = 'foo-bar-123'
        dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID))
        dataset.access_entries = entries
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.project, OTHER_PROJECT)
        self.assertEqual(
            dataset.path,
            '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID))
        self.assertEqual(dataset.access_entries, entries)

        self.assertIsNone(dataset.created)
        self.assertIsNone(dataset.full_dataset_id)
        self.assertIsNone(dataset.etag)
        self.assertIsNone(dataset.modified)
        self.assertIsNone(dataset.self_link)

        self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertIsNone(dataset.description)
        self.assertIsNone(dataset.friendly_name)
        self.assertIsNone(dataset.location)

    def test_access_entries_setter_non_list(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(TypeError):
            dataset.access_entries = object()

    def test_access_entries_setter_invalid_field(self):
        from google.cloud.bigquery.dataset import AccessEntry

        dataset = self._make_one(self.DS_REF)
        phred = AccessEntry('OWNER', 'userByEmail', '*****@*****.**')
        with self.assertRaises(ValueError):
            dataset.access_entries = [phred, object()]

    def test_access_entries_setter(self):
        from google.cloud.bigquery.dataset import AccessEntry

        dataset = self._make_one(self.DS_REF)
        phred = AccessEntry('OWNER', 'userByEmail', '*****@*****.**')
        bharney = AccessEntry('OWNER', 'userByEmail', '*****@*****.**')
        dataset.access_entries = [phred, bharney]
        self.assertEqual(dataset.access_entries, [phred, bharney])

    def test_default_table_expiration_ms_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.default_table_expiration_ms = 'bogus'

    def test_default_table_expiration_ms_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.default_table_expiration_ms = 12345
        self.assertEqual(dataset.default_table_expiration_ms, 12345)

    def test_description_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.description = 12345

    def test_description_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.description = 'DESCRIPTION'
        self.assertEqual(dataset.description, 'DESCRIPTION')

    def test_friendly_name_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.friendly_name = 12345

    def test_friendly_name_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.friendly_name = 'FRIENDLY'
        self.assertEqual(dataset.friendly_name, 'FRIENDLY')

    def test_location_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.location = 12345

    def test_location_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.location = 'LOCATION'
        self.assertEqual(dataset.location, 'LOCATION')

    def test_labels_update_in_place(self):
        dataset = self._make_one(self.DS_REF)
        del dataset._properties['labels']  # don't start w/ existing dict
        labels = dataset.labels
        labels['foo'] = 'bar'  # update in place
        self.assertEqual(dataset.labels, {'foo': 'bar'})

    def test_labels_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.labels = {'color': 'green'}
        self.assertEqual(dataset.labels, {'color': 'green'})

    def test_labels_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.labels = None

    def test_labels_getter_missing_value(self):
        dataset = self._make_one(self.DS_REF)
        self.assertEqual(dataset.labels, {})

    def test_from_api_repr_missing_identity(self):
        self._setUpConstants()
        RESOURCE = {}
        klass = self._get_target_class()
        with self.assertRaises(KeyError):
            klass.from_api_repr(RESOURCE)

    def test_from_api_repr_bare(self):
        self._setUpConstants()
        RESOURCE = {
            'id': '%s:%s' % (self.PROJECT, self.DS_ID),
            'datasetReference': {
                'projectId': self.PROJECT,
                'datasetId': self.DS_ID,
            }
        }
        klass = self._get_target_class()
        dataset = klass.from_api_repr(RESOURCE)
        self._verify_resource_properties(dataset, RESOURCE)

    def test_from_api_repr_w_properties(self):
        RESOURCE = self._make_resource()
        klass = self._get_target_class()
        dataset = klass.from_api_repr(RESOURCE)
        self._verify_resource_properties(dataset, RESOURCE)

    def test_to_api_repr_w_custom_field(self):
        dataset = self._make_one(self.DS_REF)
        dataset._properties['newAlphaProperty'] = 'unreleased property'
        resource = dataset.to_api_repr()

        exp_resource = {
            'datasetReference': self.DS_REF.to_api_repr(),
            'labels': {},
            'newAlphaProperty': 'unreleased property',
        }
        self.assertEqual(resource, exp_resource)

    def test_from_string(self):
        cls = self._get_target_class()
        got = cls.from_string('string-project.string_dataset')
        self.assertEqual(got.project, 'string-project')
        self.assertEqual(got.dataset_id, 'string_dataset')

    def test_from_string_legacy_string(self):
        cls = self._get_target_class()
        with self.assertRaises(ValueError):
            cls.from_string('string-project:string_dataset')

    def test__build_resource_w_custom_field(self):
        dataset = self._make_one(self.DS_REF)
        dataset._properties['newAlphaProperty'] = 'unreleased property'
        resource = dataset._build_resource(['newAlphaProperty'])

        exp_resource = {'newAlphaProperty': 'unreleased property'}
        self.assertEqual(resource, exp_resource)

    def test__build_resource_w_custom_field_not_in__properties(self):
        dataset = self._make_one(self.DS_REF)
        dataset.bad = 'value'
        with self.assertRaises(ValueError):
            dataset._build_resource(['bad'])

    def test_table(self):
        from google.cloud.bigquery.table import TableReference

        dataset = self._make_one(self.DS_REF)
        table = dataset.table('table_id')
        self.assertIsInstance(table, TableReference)
        self.assertEqual(table.table_id, 'table_id')
        self.assertEqual(table.dataset_id, self.DS_ID)
        self.assertEqual(table.project, self.PROJECT)

    def test___repr__(self):
        from google.cloud.bigquery.dataset import DatasetReference
        dataset = self._make_one(DatasetReference('project1', 'dataset1'))
        expected = "Dataset(DatasetReference('project1', 'dataset1'))"
        self.assertEqual(repr(dataset), expected)
Ejemplo n.º 9
0
 def test___repr__(self):
     dataset = DatasetReference('project1', 'dataset1')
     table1 = self._make_one(dataset, 'table1')
     expected = "TableReference('project1', 'dataset1', 'table1')"
     self.assertEqual(repr(table1), expected)
Ejemplo n.º 10
0
 def test___eq___equality(self):
     from google.cloud.bigquery.dataset import DatasetReference
     dataset = DatasetReference('project_1', 'dataset_1')
     table = self._make_one(dataset, 'table_1')
     other = self._make_one(dataset, 'table_1')
     self.assertEqual(table, other)
def parse_url(url):  # noqa: C901
    query = dict(url.query)  # need mutable query.

    # use_legacy_sql (legacy)
    if "use_legacy_sql" in query:
        raise ValueError("legacy sql is not supported by this dialect")
    # allow_large_results (legacy)
    if "allow_large_results" in query:
        raise ValueError(
            "allow_large_results is only allowed for legacy sql, which is not supported by this dialect"
        )
    # flatten_results (legacy)
    if "flatten_results" in query:
        raise ValueError(
            "flatten_results is only allowed for legacy sql, which is not supported by this dialect"
        )
    # maximum_billing_tier (deprecated)
    if "maximum_billing_tier" in query:
        raise ValueError("maximum_billing_tier is a deprecated argument")

    project_id = url.host
    location = None
    dataset_id = url.database or None
    arraysize = None
    credentials_path = None

    # location
    if "location" in query:
        location = query.pop("location")

    # credentials_path
    if "credentials_path" in query:
        credentials_path = query.pop("credentials_path")

    # arraysize
    if "arraysize" in query:
        str_arraysize = query.pop("arraysize")
        try:
            arraysize = int(str_arraysize)
        except ValueError:
            raise ValueError("invalid int in url query arraysize: " +
                             str_arraysize)

    # if only these "non-config" values were present, the dict will now be empty
    if not query:
        # if a dataset_id exists, we need to return a job_config that isn't None
        # so it can be updated with a dataset reference from the client
        if dataset_id:
            return (
                project_id,
                location,
                dataset_id,
                arraysize,
                credentials_path,
                QueryJobConfig(),
            )
        else:
            return project_id, location, dataset_id, arraysize, credentials_path, None

    job_config = QueryJobConfig()

    # clustering_fields list(str)
    if "clustering_fields" in query:
        clustering_fields = GROUP_DELIMITER.split(query["clustering_fields"])
        job_config.clustering_fields = list(clustering_fields)

    # create_disposition
    if "create_disposition" in query:
        create_disposition = query["create_disposition"]
        try:
            job_config.create_disposition = getattr(CreateDisposition,
                                                    create_disposition)
        except AttributeError:
            raise ValueError("invalid create_disposition in url query: " +
                             create_disposition)

    # default_dataset
    if "default_dataset" in query or "dataset_id" in query or "project_id" in query:
        raise ValueError(
            "don't pass default_dataset, dataset_id, project_id in url query, instead use the url host and database"
        )

    # destination
    if "destination" in query:
        dest_project = None
        dest_dataset = None
        dest_table = None

        try:
            dest_project, dest_dataset, dest_table = query[
                "destination"].split(".")
        except ValueError:
            raise ValueError(
                "url query destination parameter should be fully qualified with project, dataset, and table"
            )

        job_config.destination = TableReference(
            DatasetReference(dest_project, dest_dataset), dest_table)

    # destination_encryption_configuration
    if "destination_encryption_configuration" in query:
        job_config.destination_encryption_configuration = EncryptionConfiguration(
            query["destination_encryption_configuration"])

    # dry_run
    if "dry_run" in query:
        try:
            job_config.dry_run = parse_boolean(query["dry_run"])
        except ValueError:
            raise ValueError("invalid boolean in url query for dry_run: " +
                             query["dry_run"])

    # labels
    if "labels" in query:
        label_groups = GROUP_DELIMITER.split(query["labels"])
        labels = {}
        for label_group in label_groups:
            try:
                key, value = KEY_VALUE_DELIMITER.split(label_group)
            except ValueError:
                raise ValueError("malformed url query in labels: " +
                                 label_group)
            labels[key] = value

        job_config.labels = labels

    # maximum_bytes_billed
    if "maximum_bytes_billed" in query:
        try:
            job_config.maximum_bytes_billed = int(
                query["maximum_bytes_billed"])
        except ValueError:
            raise ValueError(
                "invalid int in url query maximum_bytes_billed: " +
                query["maximum_bytes_billed"])

    # priority
    if "priority" in query:
        try:
            job_config.priority = getattr(QueryPriority, query["priority"])
        except AttributeError:
            raise ValueError("invalid priority in url query: " +
                             query["priority"])

    # query_parameters
    if "query_parameters" in query:
        raise NotImplementedError("url query query_parameters not implemented")

    # schema_update_options
    if "schema_update_options" in query:
        schema_update_options = GROUP_DELIMITER.split(
            query["schema_update_options"])
        try:
            job_config.schema_update_options = [
                getattr(SchemaUpdateOption, schema_update_option)
                for schema_update_option in schema_update_options
            ]
        except AttributeError:
            raise ValueError("invalid schema_update_options in url query: " +
                             query["schema_update_options"])

    # table_definitions
    if "table_definitions" in query:
        raise NotImplementedError(
            "url query table_definitions not implemented")

    # time_partitioning
    if "time_partitioning" in query:
        raise NotImplementedError(
            "url query time_partitioning not implemented")

    # udf_resources
    if "udf_resources" in query:
        raise NotImplementedError("url query udf_resources not implemented")

    # use_query_cache
    if "use_query_cache" in query:
        try:
            job_config.use_query_cache = parse_boolean(
                query["use_query_cache"])
        except ValueError:
            raise ValueError(
                "invalid boolean in url query for use_query_cache: " +
                query["use_query_cache"])

    # write_disposition
    if "write_disposition" in query:
        try:
            job_config.write_disposition = getattr(WriteDisposition,
                                                   query["write_disposition"])
        except AttributeError:
            raise ValueError("invalid write_disposition in url query: " +
                             query["write_disposition"])

    return project_id, location, dataset_id, arraysize, credentials_path, job_config
def test_basic(url_with_everything):
    project_id, location, dataset_id, arraysize, credentials_path, job_config = parse_url(url_with_everything)

    assert project_id == 'some-project'
    assert location == 'some-location'
    assert dataset_id == 'some-dataset'
    assert arraysize == 1000
    assert credentials_path == '/some/path/to.json'
    assert isinstance(job_config, QueryJobConfig)


@pytest.mark.parametrize('param, value', [
    ('clustering_fields', ['a', 'b', 'c']),
    ('create_disposition', 'CREATE_IF_NEEDED'),
    ('destination', TableReference(DatasetReference('different-project', 'different-dataset'), 'table')),
    ('destination_encryption_configuration',
     lambda enc: enc.kms_key_name == EncryptionConfiguration('some-configuration').kms_key_name),
    ('dry_run', True),
    ('labels', {'a': 'b', 'c': 'd'}),
    ('maximum_bytes_billed', 1000),
    ('priority', 'INTERACTIVE'),
    ('schema_update_options', ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION']),
    ('use_query_cache', True),
    ('write_disposition', 'WRITE_APPEND'),
])
def test_all_values(url_with_everything, param, value):
    job_config = parse_url(url_with_everything)[5]

    config_value = getattr(job_config, param)
    if callable(value):
Ejemplo n.º 13
0
    project_id, location, dataset_id, arraysize, credentials_path, job_config = parse_url(
        url_with_everything)

    assert project_id == 'some-project'
    assert location == 'some-location'
    assert dataset_id == 'some-dataset'
    assert arraysize == 1000
    assert credentials_path == '/some/path/to.json'
    assert isinstance(job_config, QueryJobConfig)


@pytest.mark.parametrize('param, value', [
    ('clustering_fields', ['a', 'b', 'c']),
    ('create_disposition', 'CREATE_IF_NEEDED'),
    ('destination',
     TableReference(DatasetReference('different-project', 'different-dataset'),
                    'table')),
    ('destination_encryption_configuration', lambda enc: enc.kms_key_name ==
     EncryptionConfiguration('some-configuration').kms_key_name),
    ('dry_run', True),
    ('labels', {
        'a': 'b',
        'c': 'd'
    }),
    ('maximum_bytes_billed', 1000),
    ('priority', 'INTERACTIVE'),
    ('schema_update_options',
     ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION']),
    ('use_query_cache', True),
    ('write_disposition', 'WRITE_APPEND'),
])
Ejemplo n.º 14
0
 def __repr__(self):
     from google.cloud.bigquery.dataset import DatasetReference
     dataset_ref = DatasetReference(self._project, self._dataset_id)
     return "TableReference({}, '{}')".format(repr(dataset_ref),
                                              self._table_id)
Ejemplo n.º 15
0
 def test_schema_setter_non_list(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     with self.assertRaises(TypeError):
         table.schema = object()
Ejemplo n.º 16
0
    assert location == "some-location"
    assert dataset_id == "some-dataset"
    assert arraysize == 1000
    assert credentials_path == "/some/path/to.json"
    assert isinstance(job_config, QueryJobConfig)


@pytest.mark.parametrize(
    "param, value, default",
    [
        ("clustering_fields", ["a", "b", "c"], None),
        ("create_disposition", "CREATE_IF_NEEDED", None),
        (
            "destination",
            TableReference(
                DatasetReference("different-project", "different-dataset"), "table"
            ),
            None,
        ),
        (
            "destination_encryption_configuration",
            lambda enc: enc.kms_key_name
            == EncryptionConfiguration("some-configuration").kms_key_name,
            None,
        ),
        ("dry_run", True, None),
        ("labels", {"a": "b", "c": "d"}, {}),
        ("maximum_bytes_billed", 1000, None),
        ("priority", "INTERACTIVE", None),
        (
            "schema_update_options",
Ejemplo n.º 17
0
 def test_description_setter(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     table.description = 'DESCRIPTION'
     self.assertEqual(table.description, 'DESCRIPTION')
Ejemplo n.º 18
0
 def test___repr__(self):
     from google.cloud.bigquery.dataset import DatasetReference
     dataset = self._make_one(DatasetReference('project1', 'dataset1'))
     expected = "Dataset(DatasetReference('project1', 'dataset1'))"
     self.assertEqual(repr(dataset), expected)
Ejemplo n.º 19
0
 def test_friendly_name_setter(self):
     dataset = DatasetReference(self.PROJECT, self.DS_ID)
     table_ref = dataset.table(self.TABLE_NAME)
     table = self._make_one(table_ref)
     table.friendly_name = 'FRIENDLY'
     self.assertEqual(table.friendly_name, 'FRIENDLY')
Ejemplo n.º 20
0
def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
    from google.cloud.bigquery.dataset import AccessEntry

    PATH = "projects/%s/datasets" % PROJECT
    DESCRIPTION = "DESC"
    FRIENDLY_NAME = "FN"
    LOCATION = "US"
    USER_EMAIL = "*****@*****.**"
    LABELS = {"color": "red"}
    VIEW = {
        "projectId": "my-proj",
        "datasetId": "starry-skies",
        "tableId": "northern-hemisphere",
    }
    RESOURCE = {
        "datasetReference": {
            "projectId": PROJECT,
            "datasetId": DS_ID
        },
        "etag": "etag",
        "id": "%s:%s" % (PROJECT, DS_ID),
        "description": DESCRIPTION,
        "friendlyName": FRIENDLY_NAME,
        "location": LOCATION,
        "defaultTableExpirationMs": "3600",
        "labels": LABELS,
        "access": [{
            "role": "OWNER",
            "userByEmail": USER_EMAIL
        }, {
            "view": VIEW
        }],
    }
    conn = client._connection = make_connection(RESOURCE)
    entries = [
        AccessEntry("OWNER", "userByEmail", USER_EMAIL),
        AccessEntry(None, "view", VIEW),
    ]

    ds_ref = DatasetReference(PROJECT, DS_ID)
    before = Dataset(ds_ref)
    before.access_entries = entries
    before.description = DESCRIPTION
    before.friendly_name = FRIENDLY_NAME
    before.default_table_expiration_ms = 3600
    before.location = LOCATION
    before.labels = LABELS
    after = client.create_dataset(before)

    assert after.dataset_id == DS_ID
    assert after.project == PROJECT
    assert after.etag == RESOURCE["etag"]
    assert after.full_dataset_id == RESOURCE["id"]
    assert after.description == DESCRIPTION
    assert after.friendly_name == FRIENDLY_NAME
    assert after.location == LOCATION
    assert after.default_table_expiration_ms == 3600
    assert after.labels == LABELS

    conn.api_request.assert_called_once_with(
        method="POST",
        path="/%s" % PATH,
        data={
            "datasetReference": {
                "projectId": PROJECT,
                "datasetId": DS_ID
            },
            "description":
            DESCRIPTION,
            "friendlyName":
            FRIENDLY_NAME,
            "location":
            LOCATION,
            "defaultTableExpirationMs":
            "3600",
            "access": [{
                "role": "OWNER",
                "userByEmail": USER_EMAIL
            }, {
                "view": VIEW
            }],
            "labels":
            LABELS,
        },
        timeout=DEFAULT_TIMEOUT,
    )
Ejemplo n.º 21
0
class TestDataset(unittest.TestCase):
    from google.cloud.bigquery.dataset import DatasetReference

    PROJECT = "project"
    DS_ID = "dataset-id"
    DS_REF = DatasetReference(PROJECT, DS_ID)
    KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"

    @staticmethod
    def _get_target_class():
        from google.cloud.bigquery.dataset import Dataset

        return Dataset

    def _make_one(self, *args, **kw):
        return self._get_target_class()(*args, **kw)

    def _setUpConstants(self):
        import datetime
        from google.cloud._helpers import UTC

        self.WHEN_TS = 1437767599.006
        self.WHEN = datetime.datetime.utcfromtimestamp(
            self.WHEN_TS).replace(tzinfo=UTC)
        self.ETAG = "ETAG"
        self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID)
        self.RESOURCE_URL = "http://example.com/path/to/resource"

    def _make_resource(self):
        self._setUpConstants()
        USER_EMAIL = "*****@*****.**"
        GROUP_EMAIL = "*****@*****.**"
        return {
            "creationTime":
            self.WHEN_TS * 1000,
            "datasetReference": {
                "projectId": self.PROJECT,
                "datasetId": self.DS_ID
            },
            "etag":
            self.ETAG,
            "id":
            self.DS_FULL_ID,
            "lastModifiedTime":
            self.WHEN_TS * 1000,
            "location":
            "US",
            "selfLink":
            self.RESOURCE_URL,
            "defaultTableExpirationMs":
            3600,
            "access": [
                {
                    "role": "OWNER",
                    "userByEmail": USER_EMAIL
                },
                {
                    "role": "OWNER",
                    "groupByEmail": GROUP_EMAIL
                },
                {
                    "role": "WRITER",
                    "specialGroup": "projectWriters"
                },
                {
                    "role": "READER",
                    "specialGroup": "projectReaders"
                },
            ],
            "defaultEncryptionConfiguration": {
                "kmsKeyName": self.KMS_KEY_NAME
            },
        }

    def _verify_access_entry(self, access_entries, resource):
        r_entries = []
        for r_entry in resource["access"]:
            role = r_entry.pop("role")
            for entity_type, entity_id in sorted(r_entry.items()):
                r_entries.append({
                    "role": role,
                    "entity_type": entity_type,
                    "entity_id": entity_id
                })

        self.assertEqual(len(access_entries), len(r_entries))
        for a_entry, r_entry in zip(access_entries, r_entries):
            self.assertEqual(a_entry.role, r_entry["role"])
            self.assertEqual(a_entry.entity_type, r_entry["entity_type"])
            self.assertEqual(a_entry.entity_id, r_entry["entity_id"])

    def _verify_readonly_resource_properties(self, dataset, resource):

        self.assertEqual(dataset.project, self.PROJECT)
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.reference.project, self.PROJECT)
        self.assertEqual(dataset.reference.dataset_id, self.DS_ID)

        if "creationTime" in resource:
            self.assertEqual(dataset.created, self.WHEN)
        else:
            self.assertIsNone(dataset.created)
        if "etag" in resource:
            self.assertEqual(dataset.etag, self.ETAG)
        else:
            self.assertIsNone(dataset.etag)
        if "lastModifiedTime" in resource:
            self.assertEqual(dataset.modified, self.WHEN)
        else:
            self.assertIsNone(dataset.modified)
        if "selfLink" in resource:
            self.assertEqual(dataset.self_link, self.RESOURCE_URL)
        else:
            self.assertIsNone(dataset.self_link)

    def _verify_resource_properties(self, dataset, resource):

        self._verify_readonly_resource_properties(dataset, resource)

        if "defaultTableExpirationMs" in resource:
            self.assertEqual(
                dataset.default_table_expiration_ms,
                int(resource.get("defaultTableExpirationMs")),
            )
        else:
            self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertEqual(dataset.description, resource.get("description"))
        self.assertEqual(dataset.friendly_name, resource.get("friendlyName"))
        self.assertEqual(dataset.location, resource.get("location"))
        if "defaultEncryptionConfiguration" in resource:
            self.assertEqual(
                dataset.default_encryption_configuration.kms_key_name,
                resource.get("defaultEncryptionConfiguration")["kmsKeyName"],
            )
        else:
            self.assertIsNone(dataset.default_encryption_configuration)

        if "access" in resource:
            self._verify_access_entry(dataset.access_entries, resource)
        else:
            self.assertEqual(dataset.access_entries, [])

    def test_ctor_defaults(self):
        dataset = self._make_one(self.DS_REF)
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.project, self.PROJECT)
        self.assertEqual(
            dataset.path,
            "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID))
        self.assertEqual(dataset.access_entries, [])

        self.assertIsNone(dataset.created)
        self.assertIsNone(dataset.full_dataset_id)
        self.assertIsNone(dataset.etag)
        self.assertIsNone(dataset.modified)
        self.assertIsNone(dataset.self_link)

        self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertIsNone(dataset.description)
        self.assertIsNone(dataset.friendly_name)
        self.assertIsNone(dataset.location)

    def test_ctor_string(self):
        dataset = self._make_one("some-project.some_dset")
        self.assertEqual(dataset.project, "some-project")
        self.assertEqual(dataset.dataset_id, "some_dset")

    def test_ctor_string_wo_project_id(self):
        with pytest.raises(ValueError):
            # Project ID is missing.
            self._make_one("some_dset")

    def test_ctor_explicit(self):
        from google.cloud.bigquery.dataset import DatasetReference, AccessEntry

        phred = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        bharney = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        entries = [phred, bharney]
        OTHER_PROJECT = "foo-bar-123"
        dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID))
        dataset.access_entries = entries
        self.assertEqual(dataset.dataset_id, self.DS_ID)
        self.assertEqual(dataset.project, OTHER_PROJECT)
        self.assertEqual(
            dataset.path,
            "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID))
        self.assertEqual(dataset.access_entries, entries)

        self.assertIsNone(dataset.created)
        self.assertIsNone(dataset.full_dataset_id)
        self.assertIsNone(dataset.etag)
        self.assertIsNone(dataset.modified)
        self.assertIsNone(dataset.self_link)

        self.assertIsNone(dataset.default_table_expiration_ms)
        self.assertIsNone(dataset.description)
        self.assertIsNone(dataset.friendly_name)
        self.assertIsNone(dataset.location)

    def test_access_entries_setter_non_list(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(TypeError):
            dataset.access_entries = object()

    def test_access_entries_setter_invalid_field(self):
        from google.cloud.bigquery.dataset import AccessEntry

        dataset = self._make_one(self.DS_REF)
        phred = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        with self.assertRaises(ValueError):
            dataset.access_entries = [phred, object()]

    def test_access_entries_setter(self):
        from google.cloud.bigquery.dataset import AccessEntry

        dataset = self._make_one(self.DS_REF)
        phred = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        bharney = AccessEntry("OWNER", "userByEmail", "*****@*****.**")
        dataset.access_entries = [phred, bharney]
        self.assertEqual(dataset.access_entries, [phred, bharney])

    def test_default_partition_expiration_ms(self):
        dataset = self._make_one("proj.dset")
        assert dataset.default_partition_expiration_ms is None
        dataset.default_partition_expiration_ms = 12345
        assert dataset.default_partition_expiration_ms == 12345
        dataset.default_partition_expiration_ms = None
        assert dataset.default_partition_expiration_ms is None

    def test_default_table_expiration_ms_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.default_table_expiration_ms = "bogus"

    def test_default_table_expiration_ms_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.default_table_expiration_ms = 12345
        self.assertEqual(dataset.default_table_expiration_ms, 12345)

    def test_description_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.description = 12345

    def test_description_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.description = "DESCRIPTION"
        self.assertEqual(dataset.description, "DESCRIPTION")

    def test_friendly_name_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.friendly_name = 12345

    def test_friendly_name_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.friendly_name = "FRIENDLY"
        self.assertEqual(dataset.friendly_name, "FRIENDLY")

    def test_location_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.location = 12345

    def test_location_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.location = "LOCATION"
        self.assertEqual(dataset.location, "LOCATION")

    def test_labels_update_in_place(self):
        dataset = self._make_one(self.DS_REF)
        del dataset._properties["labels"]  # don't start w/ existing dict
        labels = dataset.labels
        labels["foo"] = "bar"  # update in place
        self.assertEqual(dataset.labels, {"foo": "bar"})

    def test_labels_setter(self):
        dataset = self._make_one(self.DS_REF)
        dataset.labels = {"color": "green"}
        self.assertEqual(dataset.labels, {"color": "green"})

    def test_labels_setter_bad_value(self):
        dataset = self._make_one(self.DS_REF)
        with self.assertRaises(ValueError):
            dataset.labels = None

    def test_labels_getter_missing_value(self):
        dataset = self._make_one(self.DS_REF)
        self.assertEqual(dataset.labels, {})

    def test_from_api_repr_missing_identity(self):
        self._setUpConstants()
        RESOURCE = {}
        klass = self._get_target_class()
        with self.assertRaises(KeyError):
            klass.from_api_repr(RESOURCE)

    def test_from_api_repr_bare(self):
        self._setUpConstants()
        RESOURCE = {
            "id": "%s:%s" % (self.PROJECT, self.DS_ID),
            "datasetReference": {
                "projectId": self.PROJECT,
                "datasetId": self.DS_ID
            },
        }
        klass = self._get_target_class()
        dataset = klass.from_api_repr(RESOURCE)
        self._verify_resource_properties(dataset, RESOURCE)

    def test_from_api_repr_w_properties(self):
        RESOURCE = self._make_resource()
        klass = self._get_target_class()
        dataset = klass.from_api_repr(RESOURCE)
        self._verify_resource_properties(dataset, RESOURCE)

    def test_to_api_repr_w_custom_field(self):
        dataset = self._make_one(self.DS_REF)
        dataset._properties["newAlphaProperty"] = "unreleased property"
        resource = dataset.to_api_repr()

        exp_resource = {
            "datasetReference": self.DS_REF.to_api_repr(),
            "labels": {},
            "newAlphaProperty": "unreleased property",
        }
        self.assertEqual(resource, exp_resource)

    def test_default_encryption_configuration_setter(self):
        from google.cloud.bigquery.encryption_configuration import (
            EncryptionConfiguration, )

        dataset = self._make_one(self.DS_REF)
        encryption_configuration = EncryptionConfiguration(
            kms_key_name=self.KMS_KEY_NAME)
        dataset.default_encryption_configuration = encryption_configuration
        self.assertEqual(dataset.default_encryption_configuration.kms_key_name,
                         self.KMS_KEY_NAME)
        dataset.default_encryption_configuration = None
        self.assertIsNone(dataset.default_encryption_configuration)

    def test_from_string(self):
        cls = self._get_target_class()
        got = cls.from_string("string-project.string_dataset")
        self.assertEqual(got.project, "string-project")
        self.assertEqual(got.dataset_id, "string_dataset")

    def test_from_string_legacy_string(self):
        cls = self._get_target_class()
        with self.assertRaises(ValueError):
            cls.from_string("string-project:string_dataset")

    def test__build_resource_w_custom_field(self):
        dataset = self._make_one(self.DS_REF)
        dataset._properties["newAlphaProperty"] = "unreleased property"
        resource = dataset._build_resource(["newAlphaProperty"])

        exp_resource = {"newAlphaProperty": "unreleased property"}
        self.assertEqual(resource, exp_resource)

    def test__build_resource_w_custom_field_not_in__properties(self):
        dataset = self._make_one(self.DS_REF)
        dataset.bad = "value"
        with self.assertRaises(ValueError):
            dataset._build_resource(["bad"])

    def test_table(self):
        from google.cloud.bigquery.table import TableReference

        dataset = self._make_one(self.DS_REF)
        table = dataset.table("table_id")
        self.assertIsInstance(table, TableReference)
        self.assertEqual(table.table_id, "table_id")
        self.assertEqual(table.dataset_id, self.DS_ID)
        self.assertEqual(table.project, self.PROJECT)

    def test___repr__(self):
        from google.cloud.bigquery.dataset import DatasetReference

        dataset = self._make_one(DatasetReference("project1", "dataset1"))
        expected = "Dataset(DatasetReference('project1', 'dataset1'))"
        self.assertEqual(repr(dataset), expected)