def test_get_splits_query_with_unsupported_filter(self): query = query_pb2.Query() query.kind.add() test_filter = query.filter.composite_filter.filters.add() test_filter.property_filter.op = PropertyFilter.GREATER_THAN self.assertRaises(ValueError, query_splitter.get_splits, None, query, 2)
def test_get_splits_query_with_order(self): query = query_pb2.Query() query.kind.add() query.order.add() self.assertRaises(ValueError, query_splitter.get_splits, None, query, 3)
def model_datastoreio(): """Using a Read and Write transform to read/write to Cloud Datastore.""" import uuid from google.datastore.v1 import entity_pb2 from google.datastore.v1 import query_pb2 import googledatastore import apache_beam as beam from apache_beam.utils.pipeline_options import PipelineOptions from apache_beam.io.datastore.v1.datastoreio import ReadFromDatastore from apache_beam.io.datastore.v1.datastoreio import WriteToDatastore project = 'my_project' kind = 'my_kind' query = query_pb2.Query() query.kind.add().name = kind # [START model_datastoreio_read] p = beam.Pipeline(options=PipelineOptions()) entities = p | 'Read From Datastore' >> ReadFromDatastore(project, query) # [END model_datastoreio_read] # [START model_datastoreio_write] p = beam.Pipeline(options=PipelineOptions()) musicians = p | 'Musicians' >> beam.Create( ['Mozart', 'Chopin', 'Beethoven', 'Vivaldi']) def to_entity(content): entity = entity_pb2.Entity() googledatastore.helper.add_key_path(entity.key, kind, str(uuid.uuid4())) googledatastore.helper.add_properties(entity, {'content': unicode(content)}) return entity entities = musicians | 'To Entity' >> beam.Map(to_entity) entities | 'Write To Datastore' >> WriteToDatastore(project)
def _create_split(last_key, next_key, query): """Create a new {@link Query} given the query and range.. Args: last_key: the previous key. If null then assumed to be the beginning. next_key: the next key. If null then assumed to be the end. query: the desired query. Returns: A split query with fetches entities in the range [last_key, next_key) """ if not (last_key or next_key): return query split_query = query_pb2.Query() split_query.CopyFrom(query) composite_filter = split_query.filter.composite_filter composite_filter.op = CompositeFilter.AND if query.HasField('filter'): composite_filter.filters.add().CopyFrom(query.filter) if last_key: lower_bound = composite_filter.filters.add() lower_bound.property_filter.property.name = KEY_PROPERTY_NAME lower_bound.property_filter.op = PropertyFilter.GREATER_THAN_OR_EQUAL lower_bound.property_filter.value.key_value.CopyFrom(last_key) if next_key: upper_bound = composite_filter.filters.add() upper_bound.property_filter.property.name = KEY_PROPERTY_NAME upper_bound.property_filter.op = PropertyFilter.LESS_THAN upper_bound.property_filter.value.key_value.CopyFrom(next_key) return split_query
def split_query(self, query, num_splits): """Generate dummy query splits.""" split_queries = [] for _ in range(0, num_splits): q = query_pb2.Query() q.CopyFrom(query) split_queries.append(q) return split_queries
def test_get_splits_with_multiple_splits(self): query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 4 num_entities = 369 batch_size = 12 self.check_get_splits(query, num_splits, num_entities, batch_size)
def test_get_splits_with_large_num_splits(self): query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 10 num_entities = 4 batch_size = 10 self.check_get_splits(query, num_splits, num_entities, batch_size)
def test_get_splits_with_two_splits(self): query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 2 num_entities = 97 batch_size = 9 self.check_get_splits(query, num_splits, num_entities, batch_size)
def test_get_splits_with_batch_size_exact_multiple(self): """Test get_splits when num scatter keys is a multiple of batch size.""" query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 4 num_entities = 400 batch_size = 32 self.check_get_splits(query, num_splits, num_entities, batch_size)
def test_get_splits_with_large_batch_size(self): """Test get_splits when all scatter keys are retured in a single req.""" query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 4 num_entities = 400 batch_size = 500 self.check_get_splits(query, num_splits, num_entities, batch_size)
def make_latest_timestamp_query(namespace): """Make a Query to fetch the latest timestamp statistics.""" query = query_pb2.Query() if namespace is None: query.kind.add().name = '__Stat_Total__' else: query.kind.add().name = '__Stat_Ns_Total__' # Descending order of `timestamp` datastore_helper.add_property_orders(query, "-timestamp") # Only get the latest entity query.limit.value = 1 return query
def test_create_scatter_query(self): query = query_pb2.Query() kind = query.kind.add() kind.name = 'shakespeare-demo' num_splits = 10 scatter_query = query_splitter._create_scatter_query(query, num_splits) self.assertEqual(scatter_query.kind[0], kind) self.assertEqual(scatter_query.limit.value, (num_splits - 1) * query_splitter.KEYS_PER_SPLIT) self.assertEqual(scatter_query.order[0].direction, query_pb2.PropertyOrder.ASCENDING) self.assertEqual(scatter_query.projection[0].property.name, query_splitter.KEY_PROPERTY_NAME)
def run_query(self, project_id, partition_id, read_options, query=None, gql_query=None, options=None): """ Queries for entities. Example: >>> from google.cloud.gapic.datastore.v1 import datastore_api >>> from google.datastore.v1 import datastore_pb2 >>> from google.datastore.v1 import entity_pb2 >>> api = datastore_api.DatastoreApi() >>> project_id = '' >>> partition_id = entity_pb2.PartitionId() >>> read_options = datastore_pb2.ReadOptions() >>> response = api.run_query(project_id, partition_id, read_options) Args: project_id (string): The ID of the project against which to make the request. partition_id (:class:`google.datastore.v1.entity_pb2.PartitionId`): Entities are partitioned into subsets, identified by a partition ID. Queries are scoped to a single partition. This partition ID is normalized with the standard default context partition ID. read_options (:class:`google.datastore.v1.datastore_pb2.ReadOptions`): The options for this query. query (:class:`google.datastore.v1.query_pb2.Query`): The query to run. gql_query (:class:`google.datastore.v1.query_pb2.GqlQuery`): The GQL query to run. options (:class:`google.gax.CallOptions`): Overrides the default settings for this call, e.g, timeout, retries etc. Returns: A :class:`google.datastore.v1.datastore_pb2.RunQueryResponse` instance. Raises: :exc:`google.gax.errors.GaxError` if the RPC is aborted. :exc:`ValueError` if the parameters are invalid. """ if query is None: query = query_pb2.Query() if gql_query is None: gql_query = query_pb2.GqlQuery() request = datastore_pb2.RunQueryRequest(project_id=project_id, partition_id=partition_id, read_options=read_options, query=query, gql_query=gql_query) return self._run_query(request, options)
def make_ancestor_query(kind, namespace, ancestor): """Creates a Cloud Datastore ancestor query. The returned query will fetch all the entities that have the parent key name set to the given `ancestor`. """ ancestor_key = entity_pb2.Key() datastore_helper.add_key_path(ancestor_key, kind, ancestor) if namespace is not None: ancestor_key.partition_id.namespace_id = namespace query = query_pb2.Query() query.kind.add().name = kind datastore_helper.set_property_filter( query.filter, '__key__', PropertyFilter.HAS_ANCESTOR, ancestor_key) return query
def make_kind_stats_query(namespace, kind, latest_timestamp): """Make a Query to fetch the latest kind statistics.""" kind_stat_query = query_pb2.Query() if namespace is None: kind_stat_query.kind.add().name = '__Stat_Kind__' else: kind_stat_query.kind.add().name = '__Stat_Ns_Kind__' kind_filter = datastore_helper.set_property_filter( query_pb2.Filter(), 'kind_name', PropertyFilter.EQUAL, unicode(kind)) timestamp_filter = datastore_helper.set_property_filter( query_pb2.Filter(), 'timestamp', PropertyFilter.EQUAL, latest_timestamp) datastore_helper.set_composite_filter(kind_stat_query.filter, CompositeFilter.AND, kind_filter, timestamp_filter) return kind_stat_query
def _create_scatter_query(query, num_splits): """Creates a scatter query from the given user query.""" scatter_query = query_pb2.Query() for kind in query.kind: scatter_kind = scatter_query.kind.add() scatter_kind.CopyFrom(kind) # ascending order datastore_helper.add_property_orders(scatter_query, SCATTER_PROPERTY_NAME) # There is a split containing entities before and after each scatter entity: # ||---*------*------*------*------*------*------*---|| * = scatter entity # If we represent each split as a region before a scatter entity, there is an # extra region following the last scatter point. Thus, we do not need the # scatter entity for the last region. scatter_query.limit.value = (num_splits - 1) * KEYS_PER_SPLIT datastore_helper.add_projection(scatter_query, KEY_PROPERTY_NAME) return scatter_query
def setUp(self): self._mock_datastore = MagicMock() self._query = query_pb2.Query() self._query.kind.add().name = self._KIND
def setUp(self): self._mock_datastore = MagicMock() self._query = query_pb2.Query() self._query.kind.add().name = 'dummy_kind' patch_retry(self, helper)
def test_get_splits_query_with_offset(self): query = query_pb2.Query() query.kind.add() query.offset = 10 self.assertRaises(ValueError, query_splitter.get_splits, None, query, 2)
def test_get_splits_query_with_multiple_kinds(self): query = query_pb2.Query() query.kind.add() query.kind.add() self.assertRaises(ValueError, query_splitter.get_splits, None, query, 4)