Esempio n. 1
0
  def __iter__(self):
    """Create a generator for low level entities in the range.

    Iterating through entries moves query range past the consumed entries.

    Yields:
      next entry.
    """
    raw_entity_kind = util.get_short_name(self._entity_kind)
    while True:
      if self._current_key_range is None:
        break

      while True:
        query = self._current_key_range.make_ascending_datastore_query(
            raw_entity_kind)
        results = query.Get(limit=self._batch_size)

        if not results:
          self._advance_key_range()
          break

        for entity in results:
          self._current_key_range.advance(entity.key())
          yield entity
Esempio n. 2
0
    def __iter__(self):
        """Create a generator for low level entities in the range.

    Iterating through entries moves query range past the consumed entries.

    Yields:
      next entry.
    """
        raw_entity_kind = util.get_short_name(self._entity_kind)
        while True:
            if self._current_key_range is None:
                break

            while True:
                query = self._current_key_range.make_ascending_datastore_query(
                    raw_entity_kind)
                results = query.Get(limit=self._batch_size)

                if not results:
                    self._advance_key_range()
                    break

                for entity in results:
                    self._current_key_range.advance(entity.key())
                    yield entity
Esempio n. 3
0
 def _iter_key_range(self, k_range):
   raw_entity_kind = util.get_short_name(self._entity_kind)
   query = k_range.make_ascending_datastore_query(
       raw_entity_kind)
   for entity in query.Run(
       config=datastore_query.QueryOptions(batch_size=self._batch_size)):
     yield entity.key(), entity
Esempio n. 4
0
    def _split_input_from_namespace(cls, app, namespace, entity_kind_name,
                                    shard_count):
        """Return KeyRange objects. Helper for _split_input_from_params."""

        raw_entity_kind = util.get_short_name(entity_kind_name)

        if shard_count == 1:
            # With one shard we don't need to calculate any splitpoints at all.
            return [key_range.KeyRange(namespace=namespace, _app=app)]

        # we use datastore.Query instead of ext.db.Query here, because we can't
        # erase ordering on db.Query once we set it.
        ds_query = datastore.Query(kind=raw_entity_kind,
                                   namespace=namespace,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__scatter__")
        random_keys = ds_query.Get(shard_count * cls._OVERSAMPLING_FACTOR)
        if not random_keys:
            # This might mean that there are no entities with scatter property
            # or there are no entities at all.
            return [key_range.KeyRange(namespace=namespace, _app=app)]
        random_keys.sort()
        # pick shard_count - 1 points to generate shard_count splits
        split_points_count = shard_count - 1
        if len(random_keys) > split_points_count:
            # downsample
            random_keys = [
                random_keys[len(random_keys) * i / split_points_count]
                for i in range(split_points_count)
            ]

        key_ranges = []

        key_ranges.append(
            key_range.KeyRange(key_start=None,
                               key_end=random_keys[0],
                               direction=key_range.KeyRange.ASC,
                               include_start=False,
                               include_end=False,
                               namespace=namespace))

        for i in range(0, len(random_keys) - 1):
            key_ranges.append(
                key_range.KeyRange(key_start=random_keys[i],
                                   key_end=random_keys[i + 1],
                                   direction=key_range.KeyRange.ASC,
                                   include_start=True,
                                   include_end=False,
                                   namespace=namespace))

        key_ranges.append(
            key_range.KeyRange(key_start=random_keys[-1],
                               key_end=None,
                               direction=key_range.KeyRange.ASC,
                               include_start=True,
                               include_end=False,
                               namespace=namespace))

        return key_ranges
Esempio n. 5
0
  def _split_input_from_params(cls, app, entity_kind_name,
                               params, shard_count):
    """Return input reader objects. Helper for split_input."""

    raw_entity_kind = util.get_short_name(entity_kind_name)

    # we use datastore.Query instead of ext.db.Query here, because we can't
    # erase ordering on db.Query once we set it.
    ds_query = datastore.Query(kind=raw_entity_kind, _app=app, keys_only=True)
    ds_query.Order("__key__")
    first_entity_key_list = ds_query.Get(1)
    if not first_entity_key_list:
      return []
    first_entity_key = first_entity_key_list[0]
    ds_query.Order(("__key__", datastore.Query.DESCENDING))
    try:
      last_entity_key, = ds_query.Get(1)
    except db.NeedIndexError, e:
      # TODO(user): Show this error in the worker log, not the app logs.
      logging.warning("Cannot create accurate approximation of keyspace, "
                      "guessing instead. Please address this problem: %s", e)
      # TODO(user): Use a key-end hint from the user input parameters
      # in this case, in the event the user has a good way of figuring out
      # the range of the keyspace.
      last_entity_key = key_range.KeyRange.guess_end_key(raw_entity_kind,
                                                         first_entity_key)
Esempio n. 6
0
    def _split_input_from_params(cls, app, entity_kind_name, params,
                                 shard_count):
        """Return input reader objects. Helper for split_input."""

        raw_entity_kind = util.get_short_name(entity_kind_name)

        # we use datastore.Query instead of ext.db.Query here, because we can't
        # erase ordering on db.Query once we set it.
        ds_query = datastore.Query(kind=raw_entity_kind,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__key__")
        first_entity_key_list = ds_query.Get(1)
        if not first_entity_key_list:
            return []
        first_entity_key = first_entity_key_list[0]
        ds_query.Order(("__key__", datastore.Query.DESCENDING))
        try:
            last_entity_key, = ds_query.Get(1)
        except db.NeedIndexError, e:
            # TODO(user): Show this error in the worker log, not the app logs.
            logging.warning(
                "Cannot create accurate approximation of keyspace, "
                "guessing instead. Please address this problem: %s", e)
            # TODO(user): Use a key-end hint from the user input parameters
            # in this case, in the event the user has a good way of figuring out
            # the range of the keyspace.
            last_entity_key = key_range.KeyRange.guess_end_key(
                raw_entity_kind, first_entity_key)
Esempio n. 7
0
  def _split_input_from_namespace(cls, app, namespace, entity_kind_name,
                                  shard_count):
    """Return KeyRange objects. Helper for _split_input_from_params."""

    raw_entity_kind = util.get_short_name(entity_kind_name)

    if shard_count == 1:
      # With one shard we don't need to calculate any splitpoints at all.
      return [key_range.KeyRange(namespace=namespace, _app=app)]

    # we use datastore.Query instead of ext.db.Query here, because we can't
    # erase ordering on db.Query once we set it.
    ds_query = datastore.Query(kind=raw_entity_kind,
                               namespace=namespace,
                               _app=app,
                               keys_only=True)
    ds_query.Order("__scatter__")
    random_keys = ds_query.Get(shard_count * cls._OVERSAMPLING_FACTOR)
    if not random_keys:
      # This might mean that there are no entities with scatter property
      # or there are no entities at all.
      return [key_range.KeyRange(namespace=namespace, _app=app)]
    random_keys.sort()
    # pick shard_count - 1 points to generate shard_count splits
    split_points_count = shard_count - 1
    if len(random_keys) > split_points_count:
      # downsample
      random_keys = [random_keys[len(random_keys)*i/split_points_count]
                     for i in range(split_points_count)]

    key_ranges = []

    key_ranges.append(key_range.KeyRange(
        key_start=None,
        key_end=random_keys[0],
        direction=key_range.KeyRange.ASC,
        include_start=False,
        include_end=False,
        namespace=namespace))

    for i in range(0, len(random_keys) - 1):
      key_ranges.append(key_range.KeyRange(
          key_start=random_keys[i],
          key_end=random_keys[i+1],
          direction=key_range.KeyRange.ASC,
          include_start=True,
          include_end=False,
          namespace=namespace))

    key_ranges.append(key_range.KeyRange(
        key_start=random_keys[-1],
        key_end=None,
        direction=key_range.KeyRange.ASC,
        include_start=True,
        include_end=False,
        namespace=namespace))

    return key_ranges
 def _get_raw_entity_kind(cls, model_classpath):
     entity_type = util.for_name(model_classpath)
     if isinstance(entity_type, db.Model):
         return entity_type.kind()
     elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)):
         # pylint: disable=protected-access
         return entity_type._get_kind()
     else:
         return util.get_short_name(model_classpath)
 def _get_raw_entity_kind(cls, model_classpath):
   entity_type = util.for_name(model_classpath)
   if isinstance(entity_type, db.Model):
     return entity_type.kind()
   elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)):
     # pylint: disable=protected-access
     return entity_type._get_kind()
   else:
     return util.get_short_name(model_classpath)
  def _create_iter(self, iter_cls, entity_kind):
    key_start = db.Key.from_path(util.get_short_name(entity_kind),
                                 "0",
                                 namespace=self.namespace)
    key_end = db.Key.from_path(util.get_short_name(entity_kind),
                               "999",
                               namespace=self.namespace)
    krange = key_range.KeyRange(key_start,
                                key_end,
                                include_start=True,
                                include_end=True,
                                namespace=self.namespace)

    query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind),
                                 batch_size=10,
                                 filters=self.filters,
                                 model_class_path=entity_kind)
    return iter_cls(krange, query_spec)
 def _create_iter(self, iter_cls, entity_kind):
   kranges = [key_range.KeyRange(namespace=ns) for ns in self.namespaces]
   kranges = key_ranges.KeyRangesFactory.create_from_list(kranges)
   query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind),
                                batch_size=10,
                                filters=self.filters,
                                model_class_path=entity_kind)
   itr = db_iters.RangeIteratorFactory.create_key_ranges_iterator(
       kranges, query_spec, iter_cls)
   return itr
    def _create_iter(self, iter_cls, entity_kind):
        key_start = db.Key.from_path(util.get_short_name(entity_kind),
                                     "0",
                                     namespace=self.namespace)
        key_end = db.Key.from_path(util.get_short_name(entity_kind),
                                   "999",
                                   namespace=self.namespace)
        krange = key_range.KeyRange(key_start,
                                    key_end,
                                    include_start=True,
                                    include_end=True,
                                    namespace=self.namespace)

        query_spec = model.QuerySpec(
            entity_kind=util.get_short_name(entity_kind),
            batch_size=10,
            filters=self.filters,
            model_class_path=entity_kind)
        return iter_cls(krange, query_spec)
 def _create_iter(self, iter_cls, entity_kind):
     kranges = [key_range.KeyRange(namespace=ns) for ns in self.namespaces]
     kranges = key_ranges.KeyRangesFactory.create_from_list(kranges)
     query_spec = model.QuerySpec(
         entity_kind=util.get_short_name(entity_kind),
         batch_size=10,
         filters=self.filters,
         model_class_path=entity_kind)
     itr = db_iters.RangeIteratorFactory.create_key_ranges_iterator(
         kranges, query_spec, iter_cls)
     return itr
 def _create_iter(self, entity_kind):
   query_spec = model.QuerySpec(entity_kind=util.get_short_name(entity_kind),
                                batch_size=10,
                                filters=self.filters,
                                model_class_path=entity_kind)
   p_range = property_range.PropertyRange(self.filters,
                                          entity_kind)
   ns_range = namespace_range.NamespaceRange(self.namespaces[0],
                                             self.namespaces[-1])
   itr = db_iters.RangeIteratorFactory.create_property_range_iterator(
       p_range, ns_range, query_spec)
   return itr
 def _create_iter(self, entity_kind):
     query_spec = model.QuerySpec(
         entity_kind=util.get_short_name(entity_kind),
         batch_size=10,
         filters=self.filters,
         model_class_path=entity_kind)
     p_range = property_range.PropertyRange(self.filters, entity_kind)
     ns_range = namespace_range.NamespaceRange(self.namespaces[0],
                                               self.namespaces[-1])
     itr = db_iters.RangeIteratorFactory.create_property_range_iterator(
         p_range, ns_range, query_spec)
     return itr
Esempio n. 16
0
  def __iter__(self):
    """Create a generator for keys in the range.

    Iterating through entries moves query range past the consumed entries.

    Yields:
      next entry.
    """
    while True:
      raw_entity_kind = util.get_short_name(self._entity_kind)
      query = self._key_range.make_ascending_datastore_query(
          raw_entity_kind, keys_only=True)
      results = query.Get(limit=self._batch_size)

      if not results:
        break

      for key in results:
        self._key_range.advance(key)
        yield key
Esempio n. 17
0
    def __iter__(self):
        """Create a generator for keys in the range.

    Iterating through entries moves query range past the consumed entries.

    Yields:
      next entry.
    """
        while True:
            raw_entity_kind = util.get_short_name(self._entity_kind)
            query = self._key_range.make_ascending_datastore_query(
                raw_entity_kind, keys_only=True)
            results = query.Get(limit=self._batch_size)

            if not results:
                break

            for key in results:
                self._key_range.advance(key)
                yield key
 def testGetShortName(self):
   self.assertEquals("blah", util.get_short_name("blah"))
   self.assertEquals("blah", util.get_short_name(".blah"))
   self.assertEquals("blah", util.get_short_name("__mmm__.blah"))
   self.assertEquals("blah", util.get_short_name("__mmm__.Krb.blah"))