예제 #1
0
 def test_get_splits_query_with_unsupported_filter(self):
     query = query_pb2.Query()
     query.kind.add()
     test_filter = query.filter.composite_filter.filters.add()
     test_filter.property_filter.op = PropertyFilter.GREATER_THAN
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       2)
예제 #2
0
    def test_get_splits_query_with_order(self):
        query = query_pb2.Query()
        query.kind.add()
        query.order.add()

        self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                          3)
예제 #3
0
파일: snippets.py 프로젝트: dondyabla/beam
def model_datastoreio():
  """Using a Read and Write transform to read/write to Cloud Datastore."""

  import uuid
  from google.datastore.v1 import entity_pb2
  from google.datastore.v1 import query_pb2
  import googledatastore
  import apache_beam as beam
  from apache_beam.utils.pipeline_options import PipelineOptions
  from apache_beam.io.datastore.v1.datastoreio import ReadFromDatastore
  from apache_beam.io.datastore.v1.datastoreio import WriteToDatastore

  project = 'my_project'
  kind = 'my_kind'
  query = query_pb2.Query()
  query.kind.add().name = kind

  # [START model_datastoreio_read]
  p = beam.Pipeline(options=PipelineOptions())
  entities = p | 'Read From Datastore' >> ReadFromDatastore(project, query)
  # [END model_datastoreio_read]

  # [START model_datastoreio_write]
  p = beam.Pipeline(options=PipelineOptions())
  musicians = p | 'Musicians' >> beam.Create(
      ['Mozart', 'Chopin', 'Beethoven', 'Vivaldi'])

  def to_entity(content):
    entity = entity_pb2.Entity()
    googledatastore.helper.add_key_path(entity.key, kind, str(uuid.uuid4()))
    googledatastore.helper.add_properties(entity, {'content': unicode(content)})
    return entity

  entities = musicians | 'To Entity' >> beam.Map(to_entity)
  entities | 'Write To Datastore' >> WriteToDatastore(project)
예제 #4
0
def _create_split(last_key, next_key, query):
    """Create a new {@link Query} given the query and range..

  Args:
    last_key: the previous key. If null then assumed to be the beginning.
    next_key: the next key. If null then assumed to be the end.
    query: the desired query.

  Returns:
    A split query with fetches entities in the range [last_key, next_key)
  """
    if not (last_key or next_key):
        return query

    split_query = query_pb2.Query()
    split_query.CopyFrom(query)
    composite_filter = split_query.filter.composite_filter
    composite_filter.op = CompositeFilter.AND

    if query.HasField('filter'):
        composite_filter.filters.add().CopyFrom(query.filter)

    if last_key:
        lower_bound = composite_filter.filters.add()
        lower_bound.property_filter.property.name = KEY_PROPERTY_NAME
        lower_bound.property_filter.op = PropertyFilter.GREATER_THAN_OR_EQUAL
        lower_bound.property_filter.value.key_value.CopyFrom(last_key)

    if next_key:
        upper_bound = composite_filter.filters.add()
        upper_bound.property_filter.property.name = KEY_PROPERTY_NAME
        upper_bound.property_filter.op = PropertyFilter.LESS_THAN
        upper_bound.property_filter.value.key_value.CopyFrom(next_key)

    return split_query
예제 #5
0
 def split_query(self, query, num_splits):
     """Generate dummy query splits."""
     split_queries = []
     for _ in range(0, num_splits):
         q = query_pb2.Query()
         q.CopyFrom(query)
         split_queries.append(q)
     return split_queries
예제 #6
0
    def test_get_splits_with_multiple_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 369
        batch_size = 12

        self.check_get_splits(query, num_splits, num_entities, batch_size)
예제 #7
0
    def test_get_splits_with_large_num_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 10
        num_entities = 4
        batch_size = 10

        self.check_get_splits(query, num_splits, num_entities, batch_size)
예제 #8
0
    def test_get_splits_with_two_splits(self):
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 2
        num_entities = 97
        batch_size = 9

        self.check_get_splits(query, num_splits, num_entities, batch_size)
예제 #9
0
    def test_get_splits_with_batch_size_exact_multiple(self):
        """Test get_splits when num scatter keys is a multiple of batch size."""
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 400
        batch_size = 32

        self.check_get_splits(query, num_splits, num_entities, batch_size)
예제 #10
0
    def test_get_splits_with_large_batch_size(self):
        """Test get_splits when all scatter keys are retured in a single req."""
        query = query_pb2.Query()
        kind = query.kind.add()
        kind.name = 'shakespeare-demo'
        num_splits = 4
        num_entities = 400
        batch_size = 500

        self.check_get_splits(query, num_splits, num_entities, batch_size)
예제 #11
0
def make_latest_timestamp_query(namespace):
  """Make a Query to fetch the latest timestamp statistics."""
  query = query_pb2.Query()
  if namespace is None:
    query.kind.add().name = '__Stat_Total__'
  else:
    query.kind.add().name = '__Stat_Ns_Total__'

  # Descending order of `timestamp`
  datastore_helper.add_property_orders(query, "-timestamp")
  # Only get the latest entity
  query.limit.value = 1
  return query
예제 #12
0
 def test_create_scatter_query(self):
     query = query_pb2.Query()
     kind = query.kind.add()
     kind.name = 'shakespeare-demo'
     num_splits = 10
     scatter_query = query_splitter._create_scatter_query(query, num_splits)
     self.assertEqual(scatter_query.kind[0], kind)
     self.assertEqual(scatter_query.limit.value,
                      (num_splits - 1) * query_splitter.KEYS_PER_SPLIT)
     self.assertEqual(scatter_query.order[0].direction,
                      query_pb2.PropertyOrder.ASCENDING)
     self.assertEqual(scatter_query.projection[0].property.name,
                      query_splitter.KEY_PROPERTY_NAME)
예제 #13
0
    def run_query(self,
                  project_id,
                  partition_id,
                  read_options,
                  query=None,
                  gql_query=None,
                  options=None):
        """
        Queries for entities.

        Example:
          >>> from google.cloud.gapic.datastore.v1 import datastore_api
          >>> from google.datastore.v1 import datastore_pb2
          >>> from google.datastore.v1 import entity_pb2
          >>> api = datastore_api.DatastoreApi()
          >>> project_id = ''
          >>> partition_id = entity_pb2.PartitionId()
          >>> read_options = datastore_pb2.ReadOptions()
          >>> response = api.run_query(project_id, partition_id, read_options)

        Args:
          project_id (string): The ID of the project against which to make the request.
          partition_id (:class:`google.datastore.v1.entity_pb2.PartitionId`): Entities are partitioned into subsets, identified by a partition ID.
            Queries are scoped to a single partition.
            This partition ID is normalized with the standard default context
            partition ID.
          read_options (:class:`google.datastore.v1.datastore_pb2.ReadOptions`): The options for this query.
          query (:class:`google.datastore.v1.query_pb2.Query`): The query to run.
          gql_query (:class:`google.datastore.v1.query_pb2.GqlQuery`): The GQL query to run.
          options (:class:`google.gax.CallOptions`): Overrides the default
            settings for this call, e.g, timeout, retries etc.

        Returns:
          A :class:`google.datastore.v1.datastore_pb2.RunQueryResponse` instance.

        Raises:
          :exc:`google.gax.errors.GaxError` if the RPC is aborted.
          :exc:`ValueError` if the parameters are invalid.
        """
        if query is None:
            query = query_pb2.Query()
        if gql_query is None:
            gql_query = query_pb2.GqlQuery()
        request = datastore_pb2.RunQueryRequest(project_id=project_id,
                                                partition_id=partition_id,
                                                read_options=read_options,
                                                query=query,
                                                gql_query=gql_query)
        return self._run_query(request, options)
예제 #14
0
def make_ancestor_query(kind, namespace, ancestor):
  """Creates a Cloud Datastore ancestor query.

  The returned query will fetch all the entities that have the parent key name
  set to the given `ancestor`.
  """
  ancestor_key = entity_pb2.Key()
  datastore_helper.add_key_path(ancestor_key, kind, ancestor)
  if namespace is not None:
    ancestor_key.partition_id.namespace_id = namespace

  query = query_pb2.Query()
  query.kind.add().name = kind

  datastore_helper.set_property_filter(
      query.filter, '__key__', PropertyFilter.HAS_ANCESTOR, ancestor_key)

  return query
예제 #15
0
def make_kind_stats_query(namespace, kind, latest_timestamp):
  """Make a Query to fetch the latest kind statistics."""
  kind_stat_query = query_pb2.Query()
  if namespace is None:
    kind_stat_query.kind.add().name = '__Stat_Kind__'
  else:
    kind_stat_query.kind.add().name = '__Stat_Ns_Kind__'

  kind_filter = datastore_helper.set_property_filter(
      query_pb2.Filter(), 'kind_name', PropertyFilter.EQUAL, unicode(kind))
  timestamp_filter = datastore_helper.set_property_filter(
      query_pb2.Filter(), 'timestamp', PropertyFilter.EQUAL,
      latest_timestamp)

  datastore_helper.set_composite_filter(kind_stat_query.filter,
                                        CompositeFilter.AND, kind_filter,
                                        timestamp_filter)
  return kind_stat_query
예제 #16
0
def _create_scatter_query(query, num_splits):
    """Creates a scatter query from the given user query."""

    scatter_query = query_pb2.Query()
    for kind in query.kind:
        scatter_kind = scatter_query.kind.add()
        scatter_kind.CopyFrom(kind)

    # ascending order
    datastore_helper.add_property_orders(scatter_query, SCATTER_PROPERTY_NAME)

    # There is a split containing entities before and after each scatter entity:
    # ||---*------*------*------*------*------*------*---||  * = scatter entity
    # If we represent each split as a region before a scatter entity, there is an
    # extra region following the last scatter point. Thus, we do not need the
    # scatter entity for the last region.
    scatter_query.limit.value = (num_splits - 1) * KEYS_PER_SPLIT
    datastore_helper.add_projection(scatter_query, KEY_PROPERTY_NAME)

    return scatter_query
예제 #17
0
 def setUp(self):
     self._mock_datastore = MagicMock()
     self._query = query_pb2.Query()
     self._query.kind.add().name = self._KIND
예제 #18
0
 def setUp(self):
     self._mock_datastore = MagicMock()
     self._query = query_pb2.Query()
     self._query.kind.add().name = 'dummy_kind'
     patch_retry(self, helper)
예제 #19
0
 def test_get_splits_query_with_offset(self):
     query = query_pb2.Query()
     query.kind.add()
     query.offset = 10
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       2)
예제 #20
0
 def test_get_splits_query_with_multiple_kinds(self):
     query = query_pb2.Query()
     query.kind.add()
     query.kind.add()
     self.assertRaises(ValueError, query_splitter.get_splits, None, query,
                       4)