Example #1
0
    def test__build_protobuf_all_values(self):
        from google.cloud.datastore._generated import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None, None)
        query = Query(client)
        limit = 15
        offset = 9
        start_bytes = b'i\xb7\x1d'
        start_cursor = 'abcd'
        end_bytes = b'\xc3\x1c\xb3'
        end_cursor = 'wxyz'
        iterator = self._make_one(query,
                                  client,
                                  limit=limit,
                                  offset=offset,
                                  start_cursor=start_cursor,
                                  end_cursor=end_cursor)
        self.assertEqual(iterator.max_results, limit)
        iterator.num_results = 4
        iterator._skipped_results = 1

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query(
            start_cursor=start_bytes,
            end_cursor=end_bytes,
            offset=offset - iterator._skipped_results,
        )
        expected_pb.limit.value = limit - iterator.num_results
        self.assertEqual(pb, expected_pb)
    def test__build_protobuf_all_values_except_offset(self):
        # this test and the following (all_values_except_start_and_end_cursor)
        # test mutually exclusive states; the offset is ignored
        # if a start_cursor is supplied
        from google.cloud.datastore_v1.proto import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None)
        query = Query(client)
        limit = 15
        start_bytes = b"i\xb7\x1d"
        start_cursor = "abcd"
        end_bytes = b"\xc3\x1c\xb3"
        end_cursor = "wxyz"
        iterator = self._make_one(query,
                                  client,
                                  limit=limit,
                                  start_cursor=start_cursor,
                                  end_cursor=end_cursor)
        self.assertEqual(iterator.max_results, limit)
        iterator.num_results = 4
        iterator._skipped_results = 1

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query(start_cursor=start_bytes,
                                      end_cursor=end_bytes)
        expected_pb.limit.value = limit - iterator.num_results
        self.assertEqual(pb, expected_pb)
Example #3
0
    def test__build_protobuf_all_values_except_start_and_end_cursor(self):
        # this test and the previous (all_values_except_start_offset)
        # test mutually exclusive states; the offset is ignored
        # if a start_cursor is supplied
        from google.cloud.datastore_v1.proto import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None)
        query = Query(client)
        limit = 15
        offset = 9
        iterator = self._make_one(
            query,
            client,
            limit=limit,
            offset=offset,
        )
        self.assertEqual(iterator.max_results, limit)
        iterator.num_results = 4

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query(offset=offset -
                                      iterator._skipped_results, )
        expected_pb.limit.value = limit - iterator.num_results
        self.assertEqual(pb, expected_pb)
Example #4
0
    def test__build_protobuf_empty(self):
        from google.cloud.datastore_v1.proto import query_pb2
        from google.cloud.datastore.query import Query

        client = _Client(None)
        query = Query(client)
        iterator = self._make_one(query, client)

        pb = iterator._build_protobuf()
        expected_pb = query_pb2.Query()
        self.assertEqual(pb, expected_pb)
Example #5
0
    def test__next_page_no_more(self):
        from google.cloud.datastore.query import Query

        ds_api = _make_datastore_api()
        client = _Client(None, datastore_api=ds_api)
        query = Query(client)
        iterator = self._make_one(query, client)
        iterator._more_results = False

        page = iterator._next_page()
        self.assertIsNone(page)
        ds_api.run_query.assert_not_called()
Example #6
0
    def test__next_page_no_more(self):
        from google.cloud.datastore.query import Query

        connection = _Connection()
        client = _Client(None, connection)
        query = Query(client)
        iterator = self._make_one(query, client)
        iterator._more_results = False

        page = iterator._next_page()
        self.assertIsNone(page)
        self.assertEqual(connection._called_with, [])
Example #7
0
    def _next_page_helper(self, txn_id=None, retry=None, timeout=None):
        from google.api_core import page_iterator
        from google.cloud.datastore_v1.types import datastore as datastore_pb2
        from google.cloud.datastore_v1.types import entity as entity_pb2
        from google.cloud.datastore_v1.types import query as query_pb2
        from google.cloud.datastore.query import Query

        more_enum = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED
        result = _make_query_response([], b"", more_enum, 0)
        project = "prujekt"
        ds_api = _make_datastore_api(result)
        if txn_id is None:
            client = _Client(project, datastore_api=ds_api)
        else:
            transaction = mock.Mock(id=txn_id, spec=["id"])
            client = _Client(project,
                             datastore_api=ds_api,
                             transaction=transaction)

        query = Query(client)
        kwargs = {}

        if retry is not None:
            kwargs["retry"] = retry

        if timeout is not None:
            kwargs["timeout"] = timeout

        iterator = self._make_one(query, client, **kwargs)

        page = iterator._next_page()

        self.assertIsInstance(page, page_iterator.Page)
        self.assertIs(page._parent, iterator)

        partition_id = entity_pb2.PartitionId(project_id=project)
        if txn_id is None:
            read_options = datastore_pb2.ReadOptions()
        else:
            read_options = datastore_pb2.ReadOptions(transaction=txn_id)
        empty_query = query_pb2.Query()
        ds_api.run_query.assert_called_once_with(
            request={
                "project_id": project,
                "partition_id": partition_id,
                "read_options": read_options,
                "query": empty_query,
            },
            **kwargs,
        )
    def query(self, **kwargs):
        """Proxy to :class:`google.cloud.datastore.query.Query`.

        Passes our ``project``.

        Using query to search a datastore:

        .. code-block:: python

          >>> from google.cloud import datastore
          >>> client = datastore.Client()
          >>> query = client.query(kind='MyKind')
          >>> query.add_filter('property', '=', 'val')

        Using the query iterator's
        :meth:`~google.cloud.datastore.query.Iterator.next_page` method:

        .. code-block:: python

          >>> query_iter = query.fetch()
          >>> entities, more_results, cursor = query_iter.next_page()
          >>> entities
          [<list of Entity unmarshalled from protobuf>]
          >>> more_results
          <boolean of more results>
          >>> cursor
          <string containing cursor where fetch stopped>

        Under the hood this is doing:

        .. code-block:: python

          >>> connection.run_query('project', query.to_protobuf())
          [<list of Entity Protobufs>], cursor, more_results, skipped_results

        :type kwargs: dict
        :param kwargs: Parameters for initializing and instance of
                       :class:`google.cloud.datastore.query.Query`.

        :rtype: :class:`google.cloud.datastore.query.Query`
        :returns: An instance of :class:`google.cloud.datastore.query.Query`
        """
        if 'client' in kwargs:
            raise TypeError('Cannot pass client')
        if 'project' in kwargs:
            raise TypeError('Cannot pass project')
        kwargs['project'] = self.project
        if 'namespace' not in kwargs:
            kwargs['namespace'] = self.namespace
        return Query(self, **kwargs)
    def test__next_page_w_skipped_lt_offset(self):
        from google.api_core import page_iterator
        from google.cloud.datastore_v1.types import datastore as datastore_pb2
        from google.cloud.datastore_v1.types import entity as entity_pb2
        from google.cloud.datastore_v1.types import query as query_pb2
        from google.cloud.datastore.query import Query

        project = "prujekt"
        skipped_1 = 100
        skipped_cursor_1 = b"DEADBEEF"
        skipped_2 = 50
        skipped_cursor_2 = b"FACEDACE"

        more_enum = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED

        result_1 = _make_query_response([], b"", more_enum, skipped_1)
        result_1.batch.skipped_cursor = skipped_cursor_1
        result_2 = _make_query_response([], b"", more_enum, skipped_2)
        result_2.batch.skipped_cursor = skipped_cursor_2

        ds_api = _make_datastore_api(result_1, result_2)
        client = _Client(project, datastore_api=ds_api)

        query = Query(client)
        offset = 150
        iterator = self._make_one(query, client, offset=offset)

        page = iterator._next_page()

        self.assertIsInstance(page, page_iterator.Page)
        self.assertIs(page._parent, iterator)

        partition_id = entity_pb2.PartitionId(project_id=project)
        read_options = datastore_pb2.ReadOptions()

        query_1 = query_pb2.Query(offset=offset)
        query_2 = query_pb2.Query(start_cursor=skipped_cursor_1,
                                  offset=(offset - skipped_1))
        expected_calls = [
            mock.call(
                request={
                    "project_id": project,
                    "partition_id": partition_id,
                    "read_options": read_options,
                    "query": query,
                }) for query in [query_1, query_2]
        ]
        self.assertEqual(ds_api.run_query.call_args_list, expected_calls)
Example #10
0
    def test__next_page(self):
        from google.cloud.iterator import Page
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        connection = _Connection()
        more_enum = query_pb2.QueryResultBatch.NOT_FINISHED
        result = _make_query_response([], b'', more_enum, 0)
        connection._results = [result]
        project = 'prujekt'
        client = _Client(project, connection)
        query = Query(client)
        iterator = self._make_one(query, client)

        page = iterator._next_page()
        self.assertIsInstance(page, Page)
        self.assertIs(page._parent, iterator)

        self.assertEqual(connection._called_with, [{
            'query_pb': query_pb2.Query(),
            'project': project,
            'namespace': None,
            'transaction_id': None,
        }])
Example #11
0
    def _next_page_helper(self, txn_id=None):
        from google.api_core import page_iterator
        from google.cloud.proto.datastore.v1 import datastore_pb2
        from google.cloud.proto.datastore.v1 import entity_pb2
        from google.cloud.proto.datastore.v1 import query_pb2
        from google.cloud.datastore.query import Query

        more_enum = query_pb2.QueryResultBatch.NOT_FINISHED
        result = _make_query_response([], b'', more_enum, 0)
        project = 'prujekt'
        ds_api = _make_datastore_api(result)
        if txn_id is None:
            client = _Client(project, datastore_api=ds_api)
        else:
            transaction = mock.Mock(id=txn_id, spec=['id'])
            client = _Client(project,
                             datastore_api=ds_api,
                             transaction=transaction)

        query = Query(client)
        iterator = self._make_one(query, client)

        page = iterator._next_page()
        self.assertIsInstance(page, page_iterator.Page)
        self.assertIs(page._parent, iterator)

        partition_id = entity_pb2.PartitionId(project_id=project)
        if txn_id is None:
            read_options = datastore_pb2.ReadOptions()
        else:
            read_options = datastore_pb2.ReadOptions(transaction=txn_id)
        empty_query = query_pb2.Query()
        ds_api.run_query.assert_called_once_with(project,
                                                 partition_id,
                                                 read_options,
                                                 query=empty_query)
Example #12
0
    def query(self, **kwargs):
        """Proxy to :class:`google.cloud.datastore.query.Query`.

        Passes our ``project``.

        Using query to search a datastore:

        .. code-block:: python

          >>> from google.cloud import datastore
          >>> client = datastore.Client()
          >>> query = client.query(kind='MyKind')
          >>> query.add_filter('property', '=', 'val')

        Using the query iterator

        .. code-block:: python

          >>> query_iter = query.fetch()
          >>> for entity in query_iter:
          ...     do_something(entity)

        or manually page through results

        .. code-block:: python

          >>> query_iter = query.fetch(start_cursor='2mdd223i944')
          >>> pages = query_iter.pages
          >>>
          >>> first_page = next(pages)
          >>> first_page_entities = list(first_page)
          >>> query_iter.next_page_token
          'abc-some-cursor'
          >>>
          >>> second_page = next(pages)
          >>> second_page_entities = list(second_page)
          >>> query_iter.next_page_token is None
          True

        Under the hood this is doing:

        .. code-block:: python

          >>> connection.run_query('project', query.to_protobuf())
          [<list of Entity Protobufs>], cursor, more_results, skipped_results

        :type kwargs: dict
        :param kwargs: Parameters for initializing and instance of
                       :class:`google.cloud.datastore.query.Query`.

        :rtype: :class:`google.cloud.datastore.query.Query`
        :returns: An instance of :class:`google.cloud.datastore.query.Query`
        """
        if 'client' in kwargs:
            raise TypeError('Cannot pass client')
        if 'project' in kwargs:
            raise TypeError('Cannot pass project')
        kwargs['project'] = self.project
        if 'namespace' not in kwargs:
            kwargs['namespace'] = self.namespace
        return Query(self, **kwargs)
Example #13
0
    def query(self, **kwargs):
        """Proxy to :class:`google.cloud.datastore.query.Query`.

        Passes our ``project``.

        Using query to search a datastore:

        .. testsetup:: query

           from google.cloud import datastore

           client = datastore.Client()
           query = client.query(kind='_Doctest')

           def do_something(entity):
               pass

        .. doctest:: query

           >>> query = client.query(kind='MyKind')
           >>> query.add_filter('property', '=', 'val')

        Using the query iterator

        .. doctest:: query

           >>> query_iter = query.fetch()
           >>> for entity in query_iter:
           ...     do_something(entity)

        or manually page through results

        .. testsetup:: query-page

           from google.cloud import datastore
           from tests.system.test_system import Config  # system tests

           client = datastore.Client()

           key = client.key('_Doctest')
           entity1 = datastore.Entity(key=key)
           entity1['foo'] = 1337
           entity2 = datastore.Entity(key=key)
           entity2['foo'] = 42
           Config.TO_DELETE.extend([entity1, entity2])
           client.put_multi([entity1, entity2])

           query = client.query(kind='_Doctest')
           cursor = None

        .. doctest:: query-page

           >>> query_iter = query.fetch(start_cursor=cursor)
           >>> pages = query_iter.pages
           >>>
           >>> first_page = next(pages)
           >>> first_page_entities = list(first_page)
           >>> query_iter.next_page_token
           b'...'

        :type kwargs: dict
        :param kwargs: Parameters for initializing and instance of
                       :class:`~google.cloud.datastore.query.Query`.

        :rtype: :class:`~google.cloud.datastore.query.Query`
        :returns: A query object.
        """
        if 'client' in kwargs:
            raise TypeError('Cannot pass client')
        if 'project' in kwargs:
            raise TypeError('Cannot pass project')
        kwargs['project'] = self.project
        if 'namespace' not in kwargs:
            kwargs['namespace'] = self.namespace
        return Query(self, **kwargs)
Example #14
0
    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later
        # Apply the namespace before excluding
        rpc = transaction._rpc(self.connection)

        excluded_pks = [
            rpc.key(x.kind, x.id_or_name) for x in self.query.excluded_pks
        ]

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if excluded_pks and high_mark:
            excluded_pk_count = len(excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                if isinstance(query, meta_queries.QueryByKeys):
                    # If this is a QueryByKeys, just do the datastore Get and count the results
                    resultset = (
                        x.key for x in query.fetch(limit=limit, offset=offset)
                        if x)
                else:
                    count_query = Query(query._Query__kind,
                                        keys_only=True,
                                        namespace=self.namespace)
                    count_query.update(query)
                    resultset = count_query.Run(limit=limit, offset=offset)
                self.results = [
                    len([y for y in resultset if y not in excluded_pks])
                ]
                self.results_returned = 1
            else:
                query.keys_only()

                self.results = [
                    len(list(query.fetch(limit=limit, offset=offset)))
                ]
                self.results_returned = 1
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")

        # Ensure that the results returned is reset
        self.results_returned = 0
        self.results = []

        seen = set()

        def dedupe(result):
            # FIXME: This logic can't be right. I think we need to store the distinct fields
            # somewhere on the query
            if getattr(self.original_query, "annotation_select", None):
                columns = self.original_query.annotation_select.keys()
            else:
                columns = self.query.columns or []
            if not columns:
                return result

            key = tuple(
                [result[x] for x in self._exclude_pk(columns) if x in result])
            if key in seen:
                return None
            seen.add(key)
            return result

        for entity in query.fetch(limit=limit, offset=offset):
            # If this is a keys only query, we need to generate a fake entity
            # for each key in the result set
            if isinstance(entity, Key):
                entity = EntityTransforms.convert_key_to_entity(entity)

            entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity)
            entity = EntityTransforms.convert_datetime_fields(
                self.query, entity)
            entity = EntityTransforms.fix_projected_values_type(
                self.query, entity)
            entity = EntityTransforms.rename_pk_field(
                self.query.model, self.query.concrete_model, entity)
            entity = EntityTransforms.process_extra_selects(self.query, entity)

            if self.query.distinct and self.query.extra_selects:
                entity = dedupe(entity)

            if entity:
                self.results.append(entity)
                self.results_returned += 1

            if limit and self.results_returned >= (limit - excluded_pk_count):
                break
Example #15
0
    def query(self, **kwargs):
        """Proxy to :class:`google.cloud.datastore.query.Query`.

        Passes our ``project``.

        Using query to search a datastore:

        .. testsetup:: query

            import os
            import uuid

            from google.cloud import datastore

            unique = os.getenv('CIRCLE_BUILD_NUM', str(uuid.uuid4())[0:8])
            client = datastore.Client(namespace='ns{}'.format(unique))
            query = client.query(kind='_Doctest')

            def do_something(entity):
                pass

        .. doctest:: query

            >>> query = client.query(kind='MyKind')
            >>> query.add_filter('property', '=', 'val')

        Using the query iterator

        .. doctest:: query

            >>> query_iter = query.fetch()
            >>> for entity in query_iter:
            ...     do_something(entity)

        or manually page through results

        .. testsetup:: query-page

            import os
            import uuid

            from google.cloud import datastore
            from tests.system.test_system import Config  # system tests

            unique = os.getenv('CIRCLE_BUILD_NUM', str(uuid.uuid4())[0:8])
            client = datastore.Client(namespace='ns{}'.format(unique))

            key = client.key('_Doctest')
            entity1 = datastore.Entity(key=key)
            entity1['foo'] = 1337
            entity2 = datastore.Entity(key=key)
            entity2['foo'] = 42
            Config.TO_DELETE.extend([entity1, entity2])
            client.put_multi([entity1, entity2])

            query = client.query(kind='_Doctest')
            cursor = None

        .. doctest:: query-page

            >>> query_iter = query.fetch(start_cursor=cursor)
            >>> pages = query_iter.pages
            >>>
            >>> first_page = next(pages)
            >>> first_page_entities = list(first_page)
            >>> query_iter.next_page_token is None
            True

        :param kwargs: Parameters for initializing and instance of
                       :class:`~google.cloud.datastore.query.Query`.

        :rtype: :class:`~google.cloud.datastore.query.Query`
        :returns: A query object.
        """
        if "client" in kwargs:
            raise TypeError("Cannot pass client")
        if "project" in kwargs:
            raise TypeError("Cannot pass project")
        kwargs["project"] = self.project
        if "namespace" not in kwargs:
            kwargs["namespace"] = self.namespace
        return Query(self, **kwargs)