def test__build_protobuf_all_values(self): from google.cloud.datastore._generated import query_pb2 from google.cloud.datastore.query import Query client = _Client(None, None) query = Query(client) limit = 15 offset = 9 start_bytes = b'i\xb7\x1d' start_cursor = 'abcd' end_bytes = b'\xc3\x1c\xb3' end_cursor = 'wxyz' iterator = self._make_one(query, client, limit=limit, offset=offset, start_cursor=start_cursor, end_cursor=end_cursor) self.assertEqual(iterator.max_results, limit) iterator.num_results = 4 iterator._skipped_results = 1 pb = iterator._build_protobuf() expected_pb = query_pb2.Query( start_cursor=start_bytes, end_cursor=end_bytes, offset=offset - iterator._skipped_results, ) expected_pb.limit.value = limit - iterator.num_results self.assertEqual(pb, expected_pb)
def test__build_protobuf_all_values_except_offset(self): # this test and the following (all_values_except_start_and_end_cursor) # test mutually exclusive states; the offset is ignored # if a start_cursor is supplied from google.cloud.datastore_v1.proto import query_pb2 from google.cloud.datastore.query import Query client = _Client(None) query = Query(client) limit = 15 start_bytes = b"i\xb7\x1d" start_cursor = "abcd" end_bytes = b"\xc3\x1c\xb3" end_cursor = "wxyz" iterator = self._make_one(query, client, limit=limit, start_cursor=start_cursor, end_cursor=end_cursor) self.assertEqual(iterator.max_results, limit) iterator.num_results = 4 iterator._skipped_results = 1 pb = iterator._build_protobuf() expected_pb = query_pb2.Query(start_cursor=start_bytes, end_cursor=end_bytes) expected_pb.limit.value = limit - iterator.num_results self.assertEqual(pb, expected_pb)
def test__build_protobuf_all_values_except_start_and_end_cursor(self): # this test and the previous (all_values_except_start_offset) # test mutually exclusive states; the offset is ignored # if a start_cursor is supplied from google.cloud.datastore_v1.proto import query_pb2 from google.cloud.datastore.query import Query client = _Client(None) query = Query(client) limit = 15 offset = 9 iterator = self._make_one( query, client, limit=limit, offset=offset, ) self.assertEqual(iterator.max_results, limit) iterator.num_results = 4 pb = iterator._build_protobuf() expected_pb = query_pb2.Query(offset=offset - iterator._skipped_results, ) expected_pb.limit.value = limit - iterator.num_results self.assertEqual(pb, expected_pb)
def test__build_protobuf_empty(self): from google.cloud.datastore_v1.proto import query_pb2 from google.cloud.datastore.query import Query client = _Client(None) query = Query(client) iterator = self._make_one(query, client) pb = iterator._build_protobuf() expected_pb = query_pb2.Query() self.assertEqual(pb, expected_pb)
def test__next_page_no_more(self): from google.cloud.datastore.query import Query ds_api = _make_datastore_api() client = _Client(None, datastore_api=ds_api) query = Query(client) iterator = self._make_one(query, client) iterator._more_results = False page = iterator._next_page() self.assertIsNone(page) ds_api.run_query.assert_not_called()
def test__next_page_no_more(self): from google.cloud.datastore.query import Query connection = _Connection() client = _Client(None, connection) query = Query(client) iterator = self._make_one(query, client) iterator._more_results = False page = iterator._next_page() self.assertIsNone(page) self.assertEqual(connection._called_with, [])
def _next_page_helper(self, txn_id=None, retry=None, timeout=None): from google.api_core import page_iterator from google.cloud.datastore_v1.types import datastore as datastore_pb2 from google.cloud.datastore_v1.types import entity as entity_pb2 from google.cloud.datastore_v1.types import query as query_pb2 from google.cloud.datastore.query import Query more_enum = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED result = _make_query_response([], b"", more_enum, 0) project = "prujekt" ds_api = _make_datastore_api(result) if txn_id is None: client = _Client(project, datastore_api=ds_api) else: transaction = mock.Mock(id=txn_id, spec=["id"]) client = _Client(project, datastore_api=ds_api, transaction=transaction) query = Query(client) kwargs = {} if retry is not None: kwargs["retry"] = retry if timeout is not None: kwargs["timeout"] = timeout iterator = self._make_one(query, client, **kwargs) page = iterator._next_page() self.assertIsInstance(page, page_iterator.Page) self.assertIs(page._parent, iterator) partition_id = entity_pb2.PartitionId(project_id=project) if txn_id is None: read_options = datastore_pb2.ReadOptions() else: read_options = datastore_pb2.ReadOptions(transaction=txn_id) empty_query = query_pb2.Query() ds_api.run_query.assert_called_once_with( request={ "project_id": project, "partition_id": partition_id, "read_options": read_options, "query": empty_query, }, **kwargs, )
def query(self, **kwargs): """Proxy to :class:`google.cloud.datastore.query.Query`. Passes our ``project``. Using query to search a datastore: .. code-block:: python >>> from google.cloud import datastore >>> client = datastore.Client() >>> query = client.query(kind='MyKind') >>> query.add_filter('property', '=', 'val') Using the query iterator's :meth:`~google.cloud.datastore.query.Iterator.next_page` method: .. code-block:: python >>> query_iter = query.fetch() >>> entities, more_results, cursor = query_iter.next_page() >>> entities [<list of Entity unmarshalled from protobuf>] >>> more_results <boolean of more results> >>> cursor <string containing cursor where fetch stopped> Under the hood this is doing: .. code-block:: python >>> connection.run_query('project', query.to_protobuf()) [<list of Entity Protobufs>], cursor, more_results, skipped_results :type kwargs: dict :param kwargs: Parameters for initializing and instance of :class:`google.cloud.datastore.query.Query`. :rtype: :class:`google.cloud.datastore.query.Query` :returns: An instance of :class:`google.cloud.datastore.query.Query` """ if 'client' in kwargs: raise TypeError('Cannot pass client') if 'project' in kwargs: raise TypeError('Cannot pass project') kwargs['project'] = self.project if 'namespace' not in kwargs: kwargs['namespace'] = self.namespace return Query(self, **kwargs)
def test__next_page_w_skipped_lt_offset(self): from google.api_core import page_iterator from google.cloud.datastore_v1.types import datastore as datastore_pb2 from google.cloud.datastore_v1.types import entity as entity_pb2 from google.cloud.datastore_v1.types import query as query_pb2 from google.cloud.datastore.query import Query project = "prujekt" skipped_1 = 100 skipped_cursor_1 = b"DEADBEEF" skipped_2 = 50 skipped_cursor_2 = b"FACEDACE" more_enum = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED result_1 = _make_query_response([], b"", more_enum, skipped_1) result_1.batch.skipped_cursor = skipped_cursor_1 result_2 = _make_query_response([], b"", more_enum, skipped_2) result_2.batch.skipped_cursor = skipped_cursor_2 ds_api = _make_datastore_api(result_1, result_2) client = _Client(project, datastore_api=ds_api) query = Query(client) offset = 150 iterator = self._make_one(query, client, offset=offset) page = iterator._next_page() self.assertIsInstance(page, page_iterator.Page) self.assertIs(page._parent, iterator) partition_id = entity_pb2.PartitionId(project_id=project) read_options = datastore_pb2.ReadOptions() query_1 = query_pb2.Query(offset=offset) query_2 = query_pb2.Query(start_cursor=skipped_cursor_1, offset=(offset - skipped_1)) expected_calls = [ mock.call( request={ "project_id": project, "partition_id": partition_id, "read_options": read_options, "query": query, }) for query in [query_1, query_2] ] self.assertEqual(ds_api.run_query.call_args_list, expected_calls)
def test__next_page(self): from google.cloud.iterator import Page from google.cloud.proto.datastore.v1 import query_pb2 from google.cloud.datastore.query import Query connection = _Connection() more_enum = query_pb2.QueryResultBatch.NOT_FINISHED result = _make_query_response([], b'', more_enum, 0) connection._results = [result] project = 'prujekt' client = _Client(project, connection) query = Query(client) iterator = self._make_one(query, client) page = iterator._next_page() self.assertIsInstance(page, Page) self.assertIs(page._parent, iterator) self.assertEqual(connection._called_with, [{ 'query_pb': query_pb2.Query(), 'project': project, 'namespace': None, 'transaction_id': None, }])
def _next_page_helper(self, txn_id=None): from google.api_core import page_iterator from google.cloud.proto.datastore.v1 import datastore_pb2 from google.cloud.proto.datastore.v1 import entity_pb2 from google.cloud.proto.datastore.v1 import query_pb2 from google.cloud.datastore.query import Query more_enum = query_pb2.QueryResultBatch.NOT_FINISHED result = _make_query_response([], b'', more_enum, 0) project = 'prujekt' ds_api = _make_datastore_api(result) if txn_id is None: client = _Client(project, datastore_api=ds_api) else: transaction = mock.Mock(id=txn_id, spec=['id']) client = _Client(project, datastore_api=ds_api, transaction=transaction) query = Query(client) iterator = self._make_one(query, client) page = iterator._next_page() self.assertIsInstance(page, page_iterator.Page) self.assertIs(page._parent, iterator) partition_id = entity_pb2.PartitionId(project_id=project) if txn_id is None: read_options = datastore_pb2.ReadOptions() else: read_options = datastore_pb2.ReadOptions(transaction=txn_id) empty_query = query_pb2.Query() ds_api.run_query.assert_called_once_with(project, partition_id, read_options, query=empty_query)
def query(self, **kwargs): """Proxy to :class:`google.cloud.datastore.query.Query`. Passes our ``project``. Using query to search a datastore: .. code-block:: python >>> from google.cloud import datastore >>> client = datastore.Client() >>> query = client.query(kind='MyKind') >>> query.add_filter('property', '=', 'val') Using the query iterator .. code-block:: python >>> query_iter = query.fetch() >>> for entity in query_iter: ... do_something(entity) or manually page through results .. code-block:: python >>> query_iter = query.fetch(start_cursor='2mdd223i944') >>> pages = query_iter.pages >>> >>> first_page = next(pages) >>> first_page_entities = list(first_page) >>> query_iter.next_page_token 'abc-some-cursor' >>> >>> second_page = next(pages) >>> second_page_entities = list(second_page) >>> query_iter.next_page_token is None True Under the hood this is doing: .. code-block:: python >>> connection.run_query('project', query.to_protobuf()) [<list of Entity Protobufs>], cursor, more_results, skipped_results :type kwargs: dict :param kwargs: Parameters for initializing and instance of :class:`google.cloud.datastore.query.Query`. :rtype: :class:`google.cloud.datastore.query.Query` :returns: An instance of :class:`google.cloud.datastore.query.Query` """ if 'client' in kwargs: raise TypeError('Cannot pass client') if 'project' in kwargs: raise TypeError('Cannot pass project') kwargs['project'] = self.project if 'namespace' not in kwargs: kwargs['namespace'] = self.namespace return Query(self, **kwargs)
def query(self, **kwargs): """Proxy to :class:`google.cloud.datastore.query.Query`. Passes our ``project``. Using query to search a datastore: .. testsetup:: query from google.cloud import datastore client = datastore.Client() query = client.query(kind='_Doctest') def do_something(entity): pass .. doctest:: query >>> query = client.query(kind='MyKind') >>> query.add_filter('property', '=', 'val') Using the query iterator .. doctest:: query >>> query_iter = query.fetch() >>> for entity in query_iter: ... do_something(entity) or manually page through results .. testsetup:: query-page from google.cloud import datastore from tests.system.test_system import Config # system tests client = datastore.Client() key = client.key('_Doctest') entity1 = datastore.Entity(key=key) entity1['foo'] = 1337 entity2 = datastore.Entity(key=key) entity2['foo'] = 42 Config.TO_DELETE.extend([entity1, entity2]) client.put_multi([entity1, entity2]) query = client.query(kind='_Doctest') cursor = None .. doctest:: query-page >>> query_iter = query.fetch(start_cursor=cursor) >>> pages = query_iter.pages >>> >>> first_page = next(pages) >>> first_page_entities = list(first_page) >>> query_iter.next_page_token b'...' :type kwargs: dict :param kwargs: Parameters for initializing and instance of :class:`~google.cloud.datastore.query.Query`. :rtype: :class:`~google.cloud.datastore.query.Query` :returns: A query object. """ if 'client' in kwargs: raise TypeError('Cannot pass client') if 'project' in kwargs: raise TypeError('Cannot pass project') kwargs['project'] = self.project if 'namespace' not in kwargs: kwargs['namespace'] = self.namespace return Query(self, **kwargs)
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later # Apply the namespace before excluding rpc = transaction._rpc(self.connection) excluded_pks = [ rpc.key(x.kind, x.id_or_name) for x in self.query.excluded_pks ] high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if excluded_pks and high_mark: excluded_pk_count = len(excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! if isinstance(query, meta_queries.QueryByKeys): # If this is a QueryByKeys, just do the datastore Get and count the results resultset = ( x.key for x in query.fetch(limit=limit, offset=offset) if x) else: count_query = Query(query._Query__kind, keys_only=True, namespace=self.namespace) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = [ len([y for y in resultset if y not in excluded_pks]) ] self.results_returned = 1 else: query.keys_only() self.results = [ len(list(query.fetch(limit=limit, offset=offset))) ] self.results_returned = 1 return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") # Ensure that the results returned is reset self.results_returned = 0 self.results = [] seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple( [result[x] for x in self._exclude_pk(columns) if x in result]) if key in seen: return None seen.add(key) return result for entity in query.fetch(limit=limit, offset=offset): # If this is a keys only query, we need to generate a fake entity # for each key in the result set if isinstance(entity, Key): entity = EntityTransforms.convert_key_to_entity(entity) entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity) entity = EntityTransforms.convert_datetime_fields( self.query, entity) entity = EntityTransforms.fix_projected_values_type( self.query, entity) entity = EntityTransforms.rename_pk_field( self.query.model, self.query.concrete_model, entity) entity = EntityTransforms.process_extra_selects(self.query, entity) if self.query.distinct and self.query.extra_selects: entity = dedupe(entity) if entity: self.results.append(entity) self.results_returned += 1 if limit and self.results_returned >= (limit - excluded_pk_count): break
def query(self, **kwargs): """Proxy to :class:`google.cloud.datastore.query.Query`. Passes our ``project``. Using query to search a datastore: .. testsetup:: query import os import uuid from google.cloud import datastore unique = os.getenv('CIRCLE_BUILD_NUM', str(uuid.uuid4())[0:8]) client = datastore.Client(namespace='ns{}'.format(unique)) query = client.query(kind='_Doctest') def do_something(entity): pass .. doctest:: query >>> query = client.query(kind='MyKind') >>> query.add_filter('property', '=', 'val') Using the query iterator .. doctest:: query >>> query_iter = query.fetch() >>> for entity in query_iter: ... do_something(entity) or manually page through results .. testsetup:: query-page import os import uuid from google.cloud import datastore from tests.system.test_system import Config # system tests unique = os.getenv('CIRCLE_BUILD_NUM', str(uuid.uuid4())[0:8]) client = datastore.Client(namespace='ns{}'.format(unique)) key = client.key('_Doctest') entity1 = datastore.Entity(key=key) entity1['foo'] = 1337 entity2 = datastore.Entity(key=key) entity2['foo'] = 42 Config.TO_DELETE.extend([entity1, entity2]) client.put_multi([entity1, entity2]) query = client.query(kind='_Doctest') cursor = None .. doctest:: query-page >>> query_iter = query.fetch(start_cursor=cursor) >>> pages = query_iter.pages >>> >>> first_page = next(pages) >>> first_page_entities = list(first_page) >>> query_iter.next_page_token is None True :param kwargs: Parameters for initializing and instance of :class:`~google.cloud.datastore.query.Query`. :rtype: :class:`~google.cloud.datastore.query.Query` :returns: A query object. """ if "client" in kwargs: raise TypeError("Cannot pass client") if "project" in kwargs: raise TypeError("Cannot pass project") kwargs["project"] = self.project if "namespace" not in kwargs: kwargs["namespace"] = self.namespace return Query(self, **kwargs)