def __iter__(self): """Create a generator for low level entities in the range. Iterating through entries moves query range past the consumed entries. Yields: next entry. """ raw_entity_kind = util.get_short_name(self._entity_kind) while True: if self._current_key_range is None: break while True: query = self._current_key_range.make_ascending_datastore_query( raw_entity_kind) results = query.Get(limit=self._batch_size) if not results: self._advance_key_range() break for entity in results: self._current_key_range.advance(entity.key()) yield entity
def _iter_key_range(self, k_range): raw_entity_kind = util.get_short_name(self._entity_kind) query = k_range.make_ascending_datastore_query( raw_entity_kind) for entity in query.Run( config=datastore_query.QueryOptions(batch_size=self._batch_size)): yield entity.key(), entity
def _split_input_from_namespace(cls, app, namespace, entity_kind_name, shard_count): """Return KeyRange objects. Helper for _split_input_from_params.""" raw_entity_kind = util.get_short_name(entity_kind_name) if shard_count == 1: # With one shard we don't need to calculate any splitpoints at all. return [key_range.KeyRange(namespace=namespace, _app=app)] # we use datastore.Query instead of ext.db.Query here, because we can't # erase ordering on db.Query once we set it. ds_query = datastore.Query(kind=raw_entity_kind, namespace=namespace, _app=app, keys_only=True) ds_query.Order("__scatter__") random_keys = ds_query.Get(shard_count * cls._OVERSAMPLING_FACTOR) if not random_keys: # This might mean that there are no entities with scatter property # or there are no entities at all. return [key_range.KeyRange(namespace=namespace, _app=app)] random_keys.sort() # pick shard_count - 1 points to generate shard_count splits split_points_count = shard_count - 1 if len(random_keys) > split_points_count: # downsample random_keys = [ random_keys[len(random_keys) * i / split_points_count] for i in range(split_points_count) ] key_ranges = [] key_ranges.append( key_range.KeyRange(key_start=None, key_end=random_keys[0], direction=key_range.KeyRange.ASC, include_start=False, include_end=False, namespace=namespace)) for i in range(0, len(random_keys) - 1): key_ranges.append( key_range.KeyRange(key_start=random_keys[i], key_end=random_keys[i + 1], direction=key_range.KeyRange.ASC, include_start=True, include_end=False, namespace=namespace)) key_ranges.append( key_range.KeyRange(key_start=random_keys[-1], key_end=None, direction=key_range.KeyRange.ASC, include_start=True, include_end=False, namespace=namespace)) return key_ranges
def _split_input_from_params(cls, app, entity_kind_name, params, shard_count): """Return input reader objects. Helper for split_input.""" raw_entity_kind = util.get_short_name(entity_kind_name) # we use datastore.Query instead of ext.db.Query here, because we can't # erase ordering on db.Query once we set it. ds_query = datastore.Query(kind=raw_entity_kind, _app=app, keys_only=True) ds_query.Order("__key__") first_entity_key_list = ds_query.Get(1) if not first_entity_key_list: return [] first_entity_key = first_entity_key_list[0] ds_query.Order(("__key__", datastore.Query.DESCENDING)) try: last_entity_key, = ds_query.Get(1) except db.NeedIndexError, e: # TODO(user): Show this error in the worker log, not the app logs. logging.warning("Cannot create accurate approximation of keyspace, " "guessing instead. Please address this problem: %s", e) # TODO(user): Use a key-end hint from the user input parameters # in this case, in the event the user has a good way of figuring out # the range of the keyspace. last_entity_key = key_range.KeyRange.guess_end_key(raw_entity_kind, first_entity_key)
def _split_input_from_params(cls, app, entity_kind_name, params, shard_count): """Return input reader objects. Helper for split_input.""" raw_entity_kind = util.get_short_name(entity_kind_name) # we use datastore.Query instead of ext.db.Query here, because we can't # erase ordering on db.Query once we set it. ds_query = datastore.Query(kind=raw_entity_kind, _app=app, keys_only=True) ds_query.Order("__key__") first_entity_key_list = ds_query.Get(1) if not first_entity_key_list: return [] first_entity_key = first_entity_key_list[0] ds_query.Order(("__key__", datastore.Query.DESCENDING)) try: last_entity_key, = ds_query.Get(1) except db.NeedIndexError, e: # TODO(user): Show this error in the worker log, not the app logs. logging.warning( "Cannot create accurate approximation of keyspace, " "guessing instead. Please address this problem: %s", e) # TODO(user): Use a key-end hint from the user input parameters # in this case, in the event the user has a good way of figuring out # the range of the keyspace. last_entity_key = key_range.KeyRange.guess_end_key( raw_entity_kind, first_entity_key)
def _split_input_from_namespace(cls, app, namespace, entity_kind_name, shard_count): """Return KeyRange objects. Helper for _split_input_from_params.""" raw_entity_kind = util.get_short_name(entity_kind_name) if shard_count == 1: # With one shard we don't need to calculate any splitpoints at all. return [key_range.KeyRange(namespace=namespace, _app=app)] # we use datastore.Query instead of ext.db.Query here, because we can't # erase ordering on db.Query once we set it. ds_query = datastore.Query(kind=raw_entity_kind, namespace=namespace, _app=app, keys_only=True) ds_query.Order("__scatter__") random_keys = ds_query.Get(shard_count * cls._OVERSAMPLING_FACTOR) if not random_keys: # This might mean that there are no entities with scatter property # or there are no entities at all. return [key_range.KeyRange(namespace=namespace, _app=app)] random_keys.sort() # pick shard_count - 1 points to generate shard_count splits split_points_count = shard_count - 1 if len(random_keys) > split_points_count: # downsample random_keys = [random_keys[len(random_keys)*i/split_points_count] for i in range(split_points_count)] key_ranges = [] key_ranges.append(key_range.KeyRange( key_start=None, key_end=random_keys[0], direction=key_range.KeyRange.ASC, include_start=False, include_end=False, namespace=namespace)) for i in range(0, len(random_keys) - 1): key_ranges.append(key_range.KeyRange( key_start=random_keys[i], key_end=random_keys[i+1], direction=key_range.KeyRange.ASC, include_start=True, include_end=False, namespace=namespace)) key_ranges.append(key_range.KeyRange( key_start=random_keys[-1], key_end=None, direction=key_range.KeyRange.ASC, include_start=True, include_end=False, namespace=namespace)) return key_ranges
def _get_raw_entity_kind(cls, model_classpath): entity_type = util.for_name(model_classpath) if isinstance(entity_type, db.Model): return entity_type.kind() elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)): # pylint: disable=protected-access return entity_type._get_kind() else: return util.get_short_name(model_classpath)
def _create_iter(self, iter_cls, entity_kind): key_start = db.Key.from_path(util.get_short_name(entity_kind), "0", namespace=self.namespace) key_end = db.Key.from_path(util.get_short_name(entity_kind), "999", namespace=self.namespace) krange = key_range.KeyRange(key_start, key_end, include_start=True, include_end=True, namespace=self.namespace) query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) return iter_cls(krange, query_spec)
def _create_iter(self, iter_cls, entity_kind): kranges = [key_range.KeyRange(namespace=ns) for ns in self.namespaces] kranges = key_ranges.KeyRangesFactory.create_from_list(kranges) query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) itr = db_iters.RangeIteratorFactory.create_key_ranges_iterator( kranges, query_spec, iter_cls) return itr
def _create_iter(self, iter_cls, entity_kind): key_start = db.Key.from_path(util.get_short_name(entity_kind), "0", namespace=self.namespace) key_end = db.Key.from_path(util.get_short_name(entity_kind), "999", namespace=self.namespace) krange = key_range.KeyRange(key_start, key_end, include_start=True, include_end=True, namespace=self.namespace) query_spec = model.QuerySpec( entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) return iter_cls(krange, query_spec)
def _create_iter(self, iter_cls, entity_kind): kranges = [key_range.KeyRange(namespace=ns) for ns in self.namespaces] kranges = key_ranges.KeyRangesFactory.create_from_list(kranges) query_spec = model.QuerySpec( entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) itr = db_iters.RangeIteratorFactory.create_key_ranges_iterator( kranges, query_spec, iter_cls) return itr
def _create_iter(self, entity_kind): query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) p_range = property_range.PropertyRange(self.filters, entity_kind) ns_range = namespace_range.NamespaceRange(self.namespaces[0], self.namespaces[-1]) itr = db_iters.RangeIteratorFactory.create_property_range_iterator( p_range, ns_range, query_spec) return itr
def _create_iter(self, entity_kind): query_spec = model.QuerySpec( entity_kind=util.get_short_name(entity_kind), batch_size=10, filters=self.filters, model_class_path=entity_kind) p_range = property_range.PropertyRange(self.filters, entity_kind) ns_range = namespace_range.NamespaceRange(self.namespaces[0], self.namespaces[-1]) itr = db_iters.RangeIteratorFactory.create_property_range_iterator( p_range, ns_range, query_spec) return itr
def __iter__(self): """Create a generator for keys in the range. Iterating through entries moves query range past the consumed entries. Yields: next entry. """ while True: raw_entity_kind = util.get_short_name(self._entity_kind) query = self._key_range.make_ascending_datastore_query( raw_entity_kind, keys_only=True) results = query.Get(limit=self._batch_size) if not results: break for key in results: self._key_range.advance(key) yield key
def testGetShortName(self): self.assertEquals("blah", util.get_short_name("blah")) self.assertEquals("blah", util.get_short_name(".blah")) self.assertEquals("blah", util.get_short_name("__mmm__.blah")) self.assertEquals("blah", util.get_short_name("__mmm__.Krb.blah"))