def check_DatastoreWriteFn(self, num_entities, use_fixed_batch_size=False): """A helper function to test _DatastoreWriteFn.""" with patch.object(helper, 'get_client', return_value=self._mock_client): entities = helper.create_entities(num_entities) expected_entities = [ entity.to_client_entity() for entity in entities ] # Infer project from write fn project arg. if num_entities: key = Key(['k1', 1234], project=self._PROJECT) expected_key = key.to_client_key() key.project = None entities[0].key = key expected_entities[0].key = expected_key all_batch_entities = [] commit_count = [0] self._mock_client.batch.side_effect = (lambda: FakeBatch( all_batch_items=all_batch_entities, commit_count=commit_count)) datastore_write_fn = WriteToDatastore._DatastoreWriteFn( self._PROJECT) datastore_write_fn.start_bundle() for entity in entities: datastore_write_fn.process(entity) datastore_write_fn.finish_bundle() self.assertListEqual([e.key for e in all_batch_entities], [e.key for e in expected_entities]) batch_count = math.ceil(num_entities / util.WRITE_BATCH_MAX_SIZE) self.assertLessEqual(batch_count, commit_count[0])
def test_DatastoreDeleteFn(self): with patch.object(helper, 'get_client', return_value=self._mock_client): keys = [entity.key for entity in helper.create_entities(10)] expected_keys = [key.to_client_key() for key in keys] # Infer project from delete fn project arg. key = Key(['k1', 1234], project=self._PROJECT) expected_key = key.to_client_key() key.project = None keys.append(key) expected_keys.append(expected_key) all_batch_keys = [] self._mock_client.batch.side_effect = ( lambda: FakeBatch(all_batch_items=all_batch_keys)) datastore_delete_fn = DeleteFromDatastore._DatastoreDeleteFn( self._PROJECT) datastore_delete_fn.start_bundle() for key in keys: datastore_delete_fn.process(key) datastore_delete_fn.finish_bundle() self.assertListEqual(all_batch_keys, expected_keys)
def testQuery(self): filters = [('property_name', '=', 'value')] projection = ['f1', 'f2'] order = projection distinct_on = projection ancestor_key = Key(['kind', 'id'], project=self._PROJECT) q = Query(kind='kind', project=self._PROJECT, namespace=self._NAMESPACE, ancestor=ancestor_key, filters=filters, projection=projection, order=order, distinct_on=distinct_on) cq = q._to_client_query(self._test_client) self.assertEqual(self._PROJECT, cq.project) self.assertEqual(self._NAMESPACE, cq.namespace) self.assertEqual('kind', cq.kind) self.assertEqual(ancestor_key.to_client_key(), cq.ancestor) self.assertEqual(filters, cq.filters) self.assertEqual(projection, cq.projection) self.assertEqual(order, cq.order) self.assertEqual(distinct_on, cq.distinct_on) logging.info('query: %s', q) # Test __repr__()
def make_entity(self, content): ancestor_key = Key([self._kind, self._ancestor], self._namespace, self._project) # Namespace and project are inherited from parent key. key = Key([self._kind, str(uuid.uuid4())], parent=ancestor_key) entity = Entity(key) entity.set_properties({'content': content}) return entity
def testEntityFromClientEntity(self): k = Key(['kind', 1234], project=self._PROJECT) exclude_from_indexes = ('efi1', 'efi2') e = Entity(k, exclude_from_indexes=exclude_from_indexes) ref = Key(['kind2', 1235]) e.set_properties({'efi1': 'value', 'property': 'value', 'ref': ref}) efc = Entity.from_client_entity(e.to_client_entity()) self.assertEqual(e, efc)
def testEntityToClientEntity(self): k = Key(['kind', 1234], project=self._PROJECT) kc = k.to_client_key() exclude_from_indexes = ('efi1', 'efi2') e = Entity(k, exclude_from_indexes=exclude_from_indexes) e.set_properties({'efi1': 'value', 'property': 'value'}) ec = e.to_client_entity() self.assertEqual(kc, ec.key) self.assertSetEqual(set(exclude_from_indexes), ec.exclude_from_indexes) self.assertEqual('kind', ec.kind) self.assertEqual(1234, ec.id)
def to_entity(line): fields = line.split( ',') #id,president,startYear,endYear,party,homeState,dateOfBirth id = int(fields[0]) key = Key([kind, id]) entity = Entity(key) president = fields[1] names = president.split(' ') entity.set_properties({ 'id': id, 'firstName': names[0], 'lastName': names[1], 'startYear': int(fields[2]), 'endYear': int(fields[3]), 'party': fields[4], 'homeState': fields[5], 'dateOfBirth': datetime.strptime(fields[6], '%Y-%m-%d') }) return entity
def make_entity(self, content): """Create entity from given string.""" key = Key([self._kind, hashlib.sha1(content.encode('utf-8')).hexdigest()], parent=self._parent_key) entity = Entity(key) entity.set_properties({'content': str(content)}) return entity
def process(self, element): key = Key(['natality-guid', element['guid']]) entity = Entity(key) entity.set_properties({ 'weight': element['weight'], 'time': element['time'] }) yield entity
def make_ancestor_query(project, kind, namespace, ancestor): """Creates a Cloud Datastore ancestor query. The returned query will fetch all the entities that have the parent key name set to the given `ancestor`. """ ancestor_key = Key([kind, ancestor], project=project, namespace=namespace) return Query(kind, project, namespace, ancestor_key)
def testKeyFromClientKey(self): k = Key(['k1', 1234], project=self._PROJECT, namespace=self._NAMESPACE) kfc = Key.from_client_key(k.to_client_key()) self.assertEqual(k, kfc) k2 = Key(['k2', 'adsf'], parent=k) kfc2 = Key.from_client_key(k2.to_client_key()) # Converting a key with a parent to a client_key and back loses the parent: self.assertNotEqual(k2, kfc2) self.assertTupleEqual(('k1', 1234, 'k2', 'adsf'), kfc2.path_elements) self.assertIsNone(kfc2.parent) kfc3 = Key.from_client_key(kfc2.to_client_key()) self.assertEqual(kfc2, kfc3)
def testEntityToClientEntity(self): # Test conversion from Beam type to client type. k = Key(['kind', 1234], project=self._PROJECT) kc = k.to_client_key() exclude_from_indexes = ('datetime', 'key') e = Entity(k, exclude_from_indexes=exclude_from_indexes) properties = { 'datetime': datetime.datetime.utcnow(), 'key_ref': Key(['kind2', 1235]), 'bool': True, 'float': 1.21, 'int': 1337, 'unicode': 'text', 'bytes': b'bytes', 'geopoint': GeoPoint(0.123, 0.456), 'none': None, 'list': [1, 2, 3], 'entity': Entity(Key(['kind', 111])), 'dict': { 'property': 5 }, } e.set_properties(properties) ec = e.to_client_entity() self.assertEqual(kc, ec.key) self.assertSetEqual(set(exclude_from_indexes), ec.exclude_from_indexes) self.assertEqual('kind', ec.kind) self.assertEqual(1234, ec.id) for name, unconverted in properties.items(): converted = ec[name] if name == 'key_ref': self.assertNotIsInstance(converted, Key) self._assert_keys_equal(unconverted, converted, self._PROJECT) elif name == 'entity': self.assertNotIsInstance(converted, Entity) self.assertNotIsInstance(converted.key, Key) self._assert_keys_equal(unconverted.key, converted.key, self._PROJECT) else: self.assertEqual(unconverted, converted) # Test reverse conversion. entity_from_client_entity = Entity.from_client_entity(ec) self.assertEqual(e, entity_from_client_entity)
def get_ndb_key_from_beam_key( beam_key: beam_datastore_types.Key) -> datastore_services.Key: """Returns an NDB key equivalent to the given Apache Beam key. Args: beam_key: beam_datastore_types.Key. The Apache Beam key. Returns: datastore_services.Key. The NDB key. """ return datastore_services.Key._from_ds_key(beam_key.to_client_key()) # pylint: disable=protected-access
def testKeyToClientKey(self): k = Key(['kind1', 'parent'], project=self._PROJECT, namespace=self._NAMESPACE) ck = k.to_client_key() self.assertEqual(self._PROJECT, ck.project) self.assertEqual(self._NAMESPACE, ck.namespace) self.assertEqual(('kind1', 'parent'), ck.flat_path) self.assertEqual('kind1', ck.kind) self.assertEqual('parent', ck.id_or_name) self.assertEqual(None, ck.parent) k2 = Key(['kind2', 1234], parent=k) ck2 = k2.to_client_key() self.assertEqual(self._PROJECT, ck2.project) self.assertEqual(self._NAMESPACE, ck2.namespace) self.assertEqual(('kind1', 'parent', 'kind2', 1234), ck2.flat_path) self.assertEqual('kind2', ck2.kind) self.assertEqual(1234, ck2.id_or_name) self.assertEqual(ck, ck2.parent)
def run(argv=None): """Main entry point.""" parser = argparse.ArgumentParser() parser.add_argument('--kind', dest='kind', default='writereadtest', help='Datastore Kind') parser.add_argument('--num_entities', dest='num_entities', type=int, required=True, help='Number of entities to write') parser.add_argument('--limit', dest='limit', type=int, help='Limit of number of entities to write') known_args, pipeline_args = parser.parse_known_args(argv) pipeline_options = PipelineOptions(pipeline_args) gcloud_options = pipeline_options.view_as(GoogleCloudOptions) job_name = gcloud_options.job_name kind = known_args.kind num_entities = known_args.num_entities project = gcloud_options.project # Pipeline 1: Create and write the specified number of Entities to the # Cloud Datastore. ancestor_key = Key([kind, str(uuid.uuid4())], project=project) _LOGGER.info('Writing %s entities to %s', num_entities, project) p = new_pipeline_with_job_name(pipeline_options, job_name, '-write') _ = ( p | 'Input' >> beam.Create(list(range(num_entities))) | 'To String' >> beam.Map(str) | 'To Entity' >> beam.Map(EntityWrapper(kind, ancestor_key).make_entity) | 'Write to Datastore' >> WriteToDatastore(project)) p.run() query = Query(kind=kind, project=project, ancestor=ancestor_key) # Optional Pipeline 2: If a read limit was provided, read it and confirm # that the expected entities were read. if known_args.limit is not None: _LOGGER.info( 'Querying a limited set of %s entities and verifying count.', known_args.limit) p = new_pipeline_with_job_name(pipeline_options, job_name, '-verify-limit') query.limit = known_args.limit entities = p | 'read from datastore' >> ReadFromDatastore(query) assert_that(entities | beam.combiners.Count.Globally(), equal_to([known_args.limit])) p.run() query.limit = None # Pipeline 3: Query the written Entities and verify result. _LOGGER.info('Querying entities, asserting they match.') p = new_pipeline_with_job_name(pipeline_options, job_name, '-verify') entities = p | 'read from datastore' >> ReadFromDatastore(query) assert_that(entities | beam.combiners.Count.Globally(), equal_to([num_entities])) p.run() # Pipeline 4: Delete Entities. _LOGGER.info('Deleting entities.') p = new_pipeline_with_job_name(pipeline_options, job_name, '-delete') entities = p | 'read from datastore' >> ReadFromDatastore(query) _ = (entities | 'To Keys' >> beam.Map(lambda entity: entity.key) | 'delete entities' >> DeleteFromDatastore(project)) p.run() # Pipeline 5: Query the written Entities, verify no results. _LOGGER.info( 'Querying for the entities to make sure there are none present.') p = new_pipeline_with_job_name(pipeline_options, job_name, '-verify-deleted') entities = p | 'read from datastore' >> ReadFromDatastore(query) assert_that(entities | beam.combiners.Count.Globally(), equal_to([0])) p.run()
def testKeyToClientKeyMissingProject(self): k = Key(['k1', 1234], namespace=self._NAMESPACE) with self.assertRaisesRegexp(ValueError, r'project'): _ = Key.from_client_key(k.to_client_key())
def testKeyFromClientKeyNoNamespace(self): k = Key(['k1', 1234], project=self._PROJECT) ck = k.to_client_key() self.assertEqual(None, ck.namespace) # Test that getter doesn't croak. kfc = Key.from_client_key(ck) self.assertEqual(k, kfc)
def to_entity(content): key = Key([kind, str(uuid.uuid4())]) entity = Entity(key) entity.set_properties({'content': content}) return entity