Esempio n. 1
0
 def get_table_list(self, cursor):
     namespace = self.connection.settings_dict.get("NAMESPACE")
     kinds = [kind.key().id_or_name() for kind in datastore.Query('__kind__', namespace=namespace).Run()]
     return [TableInfo(x, "t") for x in kinds]
Esempio n. 2
0
    def _split_ns_by_scatter(cls, shard_count, namespace, raw_entity_kind,
                             app):
        """Split a namespace by scatter index into key_range.KeyRange.

    TODO(user): Power this with key_range.KeyRange.compute_split_points.

    Args:
      shard_count: number of shards.
      namespace: namespace name to split. str.
      raw_entity_kind: low level datastore API entity kind.
      app: app id in str.

    Returns:
      A list of key_range.KeyRange objects. If there are not enough entities to
    splits into requested shards, the returned list will contain KeyRanges
    ordered lexicographically with any Nones appearing at the end.
    """
        if shard_count == 1:
            # With one shard we don't need to calculate any split points at all.
            return [key_range.KeyRange(namespace=namespace, _app=app)]

        ds_query = datastore.Query(kind=raw_entity_kind,
                                   namespace=namespace,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__scatter__")
        oversampling_factor = 32
        random_keys = ds_query.Get(shard_count * oversampling_factor)

        if not random_keys:
            # There are no entities with scatter property. We have no idea
            # how to split.
            return ([key_range.KeyRange(namespace=namespace, _app=app)] +
                    [None] * (shard_count - 1))

        random_keys.sort()

        if len(random_keys) >= shard_count:
            # We've got a lot of scatter values. Sample them down.
            random_keys = cls._choose_split_points(random_keys, shard_count)

        k_ranges = []

        k_ranges.append(
            key_range.KeyRange(key_start=None,
                               key_end=random_keys[0],
                               direction=key_range.KeyRange.ASC,
                               include_start=False,
                               include_end=False,
                               namespace=namespace,
                               _app=app))

        for i in range(0, len(random_keys) - 1):
            k_ranges.append(
                key_range.KeyRange(key_start=random_keys[i],
                                   key_end=random_keys[i + 1],
                                   direction=key_range.KeyRange.ASC,
                                   include_start=True,
                                   include_end=False,
                                   namespace=namespace,
                                   _app=app))

        k_ranges.append(
            key_range.KeyRange(key_start=random_keys[-1],
                               key_end=None,
                               direction=key_range.KeyRange.ASC,
                               include_start=True,
                               include_end=False,
                               namespace=namespace,
                               _app=app))

        if len(k_ranges) < shard_count:
            # We need to have as many shards as it was requested. Add some Nones.
            k_ranges += [None] * (shard_count - len(k_ranges))
        return k_ranges
Esempio n. 3
0
    def guess_end_key(kind, key_start, probe_count=30, split_rate=5):
        """Guess the end of a key range with a binary search of probe queries.

    When the 'key_start' parameter has a key hierarchy, this function will
    only determine the key range for keys in a similar hierarchy. That means
    if the keys are in the form:

      kind=Foo, name=bar/kind=Stuff, name=meep

    only this range will be probed:

      kind=Foo, name=*/kind=Stuff, name=*

    That means other entities of kind 'Stuff' that are children of another
    parent entity kind will be skipped:

      kind=Other, name=cookie/kind=Stuff, name=meep

    Args:
      key_start: The starting key of the search range. In most cases this
        should be id = 0 or name = '\0'.  May be db.Key or ndb.Key.
      kind: String name of the entity kind.
      probe_count: Optional, how many probe queries to run.
      split_rate: Exponential rate to use for splitting the range on the
        way down from the full key space. For smaller ranges this should
        be higher so more of the keyspace is skipped on initial descent.

    Returns:
      db.Key that is guaranteed to be as high or higher than the
      highest key existing for this Kind. Doing a query between 'key_start' and
      this returned Key (inclusive) will contain all entities of this Kind.

    NOTE: Even though an ndb.Key instance is accepted as argument,
    the return value is always a db.Key instance.
    """
        if ndb is not None:

            if isinstance(key_start, ndb.Key):
                key_start = key_start.to_old_key()
        app = key_start.app()
        namespace = key_start.namespace()

        full_path = key_start.to_path()
        for index, piece in enumerate(full_path):
            if index % 2 == 0:

                continue
            elif isinstance(piece, str):

                full_path[index] = "\xffff"
            else:

                full_path[index] = 2**63 - 1

        key_end = db.Key.from_path(*full_path, **{
            "_app": app,
            "namespace": namespace
        })
        split_key = key_end

        for i in range(probe_count):
            for j in range(split_rate):
                split_key = KeyRange.split_keys(key_start, split_key, 1)
            results = datastore.Query(kind, {
                "__key__ >": split_key
            },
                                      namespace=namespace,
                                      _app=app,
                                      keys_only=True).Get(1)
            if results:
                if results[0].name() and not key_start.name():

                    return KeyRange.guess_end_key(kind, results[0],
                                                  probe_count - 1, split_rate)
                else:
                    split_rate = 1
                    key_start = results[0]
                    split_key = key_end
            else:
                key_end = split_key

        return key_end
Esempio n. 4
0
def replicate(table, kind, cursor, stats, **kwargs):
    """Drive replication to Google CloudSQL."""

    start = time.time()

    logging.debug(u'cursor: %s (%r)', type(cursor), cursor)
    if cursor:
        if isinstance(cursor, basestring):
            cursor = datastore_query.Cursor.from_websafe_string(cursor)
        query = datastore.Query(kind=kind, cursor=cursor)
    else:
        query = datastore.Query(kind=kind)

    if 'filters' in kwargs:
        for property_operator, value in kwargs['filters']:
            query[property_operator] = value

    batch_size = stats.get('batch_size', 1)
    query_iterator = query.Run(limit=batch_size, offset=0)

    entitydicts = entity_list_generator(query_iterator, table)
    entities = table.normalize_entities(entitydicts)

    if not entities:
        stats['time'] += time.time() - start
        return None

    # MAXSIZE is chosen very conservativly.
    # Even if a batch is larger, it's very likely not too large
    # for a single write call.
    try:
        with DatabaseCursor() as dbcursor:
            dbcursor.executemany(table.get_replace_statement(), entities)
            stats['records'] += len(entities)
    except (rdbms.InternalError, rdbms.IntegrityError) as msg:
        logging.warning(u'Caught RDBMS exception: %s', msg)
        raise
    except apiproxy_errors.RequestTooLargeError:
        logging.warn(u'Request too big: %s', get_listsize(entities))
        stats['batch_size'] /= 10
        return cursor
    except TypeError as exception:
        if 'not enough arguments' in str(exception):
            logging.debug(u'statement: %r',
                          table.get_replace_statement(),
                          exc_info=True)
            logging.debug(u'%d: %r', len(table.fields), table.fields)
            for entity in entities:
                if len(entity) != len(table.fields):
                    logging.debug(u'(%d): %s', len(entity), entity)
                    break
        raise
    except:
        logging.debug(u'statement: %r',
                      table.get_replace_statement(),
                      exc_info=True)
        raise

    # Adapt batch size. This could be further optimized in the future,
    # like adapting it to a ratio of size and MAXSIZE.
    size = get_listsize(entities)
    if size * 2 < replication_config.MAXSIZE:
        stats['batch_size'] = int(
            min([replication_config.MAXRECORDS, batch_size * 2]))
        logging.info(u'increasing batch_size to %d', stats['batch_size'])
    elif size > replication_config.MAXSIZE:
        stats['batch_size'] = int(
            min([replication_config.MAXRECORDS, batch_size * 0.8]))
        logging.info(u'decreasing batch_size to %d', stats['batch_size'])

    stats['time'] += time.time() - start
    return query.GetCursor()
Esempio n. 5
0
  def Bind(self, args, keyword_args, cursor=None, end_cursor=None):
    """Bind the existing query to the argument list.

    Assumes that the input args are first positional, then a dictionary.
    So, if the query contains references to :1, :2 and :name, it is assumed
    that arguments are passed as (:1, :2, dict) where dict contains a mapping
    [name] -> value.

    Args:
      args: the arguments to bind to the object's unbound references.
      keyword_args: dictionary-based arguments (for named parameters).

    Raises:
      datastore_errors.BadArgumentError: when arguments are left unbound
        (missing from the inputs arguments) or when arguments do not match the
        expected type.

    Returns:
      The bound datastore.Query object. This may take the form of a MultiQuery
      object if the GQL query will require multiple backend queries to statisfy.
    """
    num_args = len(args)
    input_args = frozenset(xrange(num_args))
    used_args = set()

    queries = []
    enumerated_queries = self.EnumerateQueries(used_args, args, keyword_args)
    if enumerated_queries:
      query_count = len(enumerated_queries)
    else:
      query_count = 1

    for i in xrange(query_count):
      queries.append(datastore.Query(self._entity,
                                     _app=self.__app,
                                     keys_only=self._keys_only,
                                     namespace=self.__namespace,
                                     cursor=cursor,
                                     end_cursor=end_cursor))

    logging.log(LOG_LEVEL,
                'Binding with %i positional args %s and %i keywords %s'
                , len(args), args, len(keyword_args), keyword_args)

    for ((identifier, condition), value_list) in self.__filters.iteritems():
      for (operator, params) in value_list:
        value = self.__Operate(args, keyword_args, used_args, operator, params)
        if not self.__IsMultiQuery(condition):
          for query in queries:
            self.__AddFilterToQuery(identifier, condition, value, query)



    unused_args = input_args - used_args
    if unused_args:
      unused_values = [unused_arg + 1 for unused_arg in unused_args]
      raise datastore_errors.BadArgumentError('Unused positional arguments %s' %
                                              unused_values)


    if enumerated_queries:
      logging.log(LOG_LEVEL,
                  'Multiple Queries Bound: %s',
                  enumerated_queries)



      for (query, enumerated_query) in zip(queries, enumerated_queries):
        query.update(enumerated_query)


    if self.__orderings:
      for query in queries:
        query.Order(*tuple(self.__orderings))

    if query_count > 1:


      return MultiQuery(queries, self.__orderings)
    else:
      return queries[0]
    def Bind(self, args, keyword_args):
        """Bind the existing query to the argument list.

    Assumes that the input args are first positional, then a dictionary.
    So, if the query contains references to :1, :2 and :name, it is assumed
    that arguments are passed as (:1, :2, dict) where dict contains a mapping
    [name] -> value.

    Args:
      args: the arguments to bind to the object's unbound references.
      keyword_args: dictionary-based arguments (for named parameters).

    Raises:
      datastore_errors.BadArgumentError: when arguments are left unbound
        (missing from the inputs arguments).

    Returns:
      The bound datastore.Query object.
    """
        num_args = len(args)
        input_args = frozenset(xrange(num_args))
        used_args = set()

        query = datastore.Query(self._entity, _app=self.__app)

        logging.log(LOG_LEVEL, 'Copying %i pre-bound filters',
                    len(self.__bound_filters))
        for (condition, value) in self.__bound_filters.iteritems():
            logging.log(LOG_LEVEL, 'Pre-bound filter: %s %s', condition, value)
            query[condition] = value

        logging.log(LOG_LEVEL, 'Binding with %i args %s', len(args), args)
        for (param, filters) in self.__filters.iteritems():
            for (identifier, condition) in filters:
                if isinstance(param, int):
                    if param <= num_args:
                        self.__AddFilter(identifier, condition,
                                         args[param - 1], query)
                        used_args.add(param - 1)
                        logging.log(LOG_LEVEL, 'binding: %i %s', param,
                                    args[param - 1])
                    else:
                        raise datastore_errors.BadArgumentError(
                            'Missing argument for bind, requires argument #%i, '
                            'but only has %i args.' % (param, num_args))
                elif isinstance(param, str):
                    if param in keyword_args:
                        self.__AddFilter(identifier, condition,
                                         keyword_args[param], query)
                        logging.log(LOG_LEVEL, 'binding: %s %s', param,
                                    keyword_args)
                    else:
                        raise datastore_errors.BadArgumentError(
                            'Missing named arguments for bind, requires argument %s'
                            % param)
                else:
                    assert False, 'Unknown parameter %s' % param

        if self.__orderings:
            query.Order(*tuple(self.__orderings))

        unused_args = input_args - used_args
        if unused_args:
            unused_values = [unused_arg + 1 for unused_arg in unused_args]
            raise datastore_errors.BadArgumentError(
                'Unused positional arguments %s' % unused_values)

        return query
Esempio n. 7
0
	def filter(self, filter, value=None ):
		"""
			Adds a filter to this query. #fixme: Better description required here...

			The following examples are equivalent: ``filter( "name", "John" )``
			and ``filter( {"name": "John"} )``.

			See also :func:`server.db.Query.mergeExternalFilter` for a safer filter implementation.

			:param filter: A dictionary to read the filters from, or a string (name of that filter)
			:type filter: dict | str

			:param value: The value of that filter. Only valid, if *key* is a string.
			:type: value: int | long | float | bytes | string | list | datetime

			:returns: Returns the query itself for chaining.
			:rtype: server.db.Query
		"""
		if self.datastoreQuery is None:
			#This query is already unsatisfiable and adding more constrains to this won't change this
			return( self )
		if isinstance( filter, dict ):
			for k, v in filter.items():
				self.filter( k, v )
			return( self )
		if self._filterHook is not None:
			try:
				r = self._filterHook( self, filter, value )
			except RuntimeError:
				self.datastoreQuery = None
				return( self )
			if r is None:
				# The Hook did something special directly on 'self' to apply that filter,
				# no need for us to do anything
				return( self )
			filter, value = r
		if value!=None and (filter.endswith(" !=") or filter.lower().endswith(" in")):
			if isinstance( self.datastoreQuery, datastore.MultiQuery ):
				raise NotImplementedError("You cannot use multiple IN or != filter")
			origQuery = self.datastoreQuery
			queries = []
			if filter.endswith("!="):
				q = datastore.Query( kind=self.getKind() )
				q[ "%s <" % filter.split(" ")[0] ] = value
				queries.append( q )
				q = datastore.Query( kind=self.getKind() )
				q[ "%s >" % filter.split(" ")[0] ] = value
				queries.append( q )
			else: #IN filter
				if not (isinstance( value, list ) or isinstance( value, tuple ) ):
					raise NotImplementedError("Value must be list or tuple if using IN filter!")
				for val in value:
					q = datastore.Query( kind=self.getKind() )
					q[ "%s =" % filter.split(" ")[0] ] = val
					q.Order( *origQuery.__orderings )
					queries.append( q )
			self.datastoreQuery = MultiQuery( queries, origQuery.__orderings )
			for k,v in origQuery.items():
				self.datastoreQuery[ k ] = v
		elif filter and value!=None:
			self.datastoreQuery[ filter ] = value
		else:
			raise NotImplementedError("Incorrect call to query.filter()!")
		return( self )
Esempio n. 8
0
 def _get_testmodel_query(self, db="default"):
     namespace = settings.DATABASES[db].get('NAMESPACE', '')
     return datastore.Query(
         TestModel._meta.db_table,
         namespace=namespace
     )
Esempio n. 9
0
 def respond(self):
     from google.appengine.ext.ndb import metadata
     from google.appengine.api import search, datastore
     from google.appengine.ext import blobstore
     # @todo THIS DELETES EVERYTHING FROM DATASTORE AND BLOBSTORE, AND CURRENTLY EXISTS ONLY FOR TESTING PURPOSES!
     models = iom.Engine.get_schema()
     kinds = [str(i) for i in xrange(200)]
     namespaces = metadata.get_namespaces()
     indexes = []
     if not self.request.get('do_not_delete_datastore'):
         if self.request.get('kinds'):
             kinds = self.request.get('kinds').split(',')
         ignore = []
         if self.request.get('ignore'):
             ignore = self.request.get('ignore')
         tools.log.debug('Delete kinds %s' % kinds)
         for kind in kinds:
             for namespace in namespaces:
                 if kind in ignore:
                     continue
                 p = tools.Profile()
                 gets = datastore.Query(kind,
                                        namespace=namespace,
                                        keys_only=True).Run()
                 keys = list(gets)
                 total_keys = len(keys)
                 if total_keys:
                     tools.log.debug(
                         'Delete kind %s. Found %s keys. Took %sms to get.'
                         % (kind, total_keys, p.miliseconds))
                     p = tools.Profile()
                     datastore.Delete(keys)
                     tools.log.debug(
                         'Deleted all records for kind %s. Took %sms.' %
                         (kind, p.miliseconds))
     indexes.extend(
         (search.Index(name='catalogs'), search.Index(name='24')))
     # empty catalog index!
     if not self.request.get('do_not_delete_indexes'):
         docs = 0
         for index in indexes:
             while True:
                 document_ids = [
                     document.doc_id
                     for document in index.get_range(ids_only=True)
                 ]
                 if not document_ids:
                     break
                 try:
                     index.delete(document_ids)
                     docs += len(document_ids)
                 except:
                     pass
         tools.log.debug('Deleted %s indexes. With total of %s documents.' %
                         (len(indexes), docs))
     # delete all blobs
     if not self.request.get('do_not_delete_blobs'):
         keys = blobstore.BlobInfo.all().fetch(None, keys_only=True)
         blobstore.delete(keys)
         tools.log.debug('Deleted %s blobs.' % len(keys))
     if not self.request.get('do_not_delete_memcache'):
         tools.mem_flush_all()
Esempio n. 10
0
	def mergeExternalFilter(self, filters ):
		"""
			Safely merges filters according to the data model.

			Its only valid to call this function if the query has been created using
			:func:`server.skeleton.Skeleton.all`.

			Its safe to pass filters received from an external source (a user);
			unknown/invalid filters will be ignored, so the query-object is kept in a
			valid state even when processing malformed data.
			
			If complex queries are needed (e.g. filter by relations), this function
			shall also be used.

			See also :func:`server.db.Query.filter` for simple filters.

			:param filters: A dictionary of attributes and filter pairs.
			:type filters: dict

			:returns: Returns the query itself for chaining.
			:rtype: server.db.Query
		"""
		from server.bones import baseBone, relationalBone
		if "id" in filters.keys():
			self.datastoreQuery = None
			logging.error("Filtering by id is no longer supported. Use key instead.")
			return self
		if self.srcSkel is None:
			raise NotImplementedError("This query has not been created using skel.all()")
		if self.datastoreQuery is None: #This query is allready unsatifiable and adding more constrains to this wont change this
			return( self )
		skel = self.srcSkel
		if skel.searchIndex and "search" in filters.keys(): #We perform a Search via Google API - all other parameters are ignored
			try:
				searchRes = search.Index( name=skel.searchIndex ).search( query=search.Query( query_string=filters["search"], options=search.QueryOptions( limit=25 ) ) )
			except search.QueryError: #We cant parse the query, treat it as verbatim
				qstr="\"%s\"" % filters["search"].replace("\"","")
				searchRes = search.Index( name=skel.searchIndex ).search( query=search.Query( query_string=qstr, options=search.QueryOptions( limit=25 ) ) )
			tmpRes = [ datastore_types.Key( encoded=x.doc_id[ 2: ] ) for x in searchRes ]
			if tmpRes:
				filters = []
				for x in tmpRes:
					filters.append( datastore.Query( self.getKind(), { "%s =" % datastore_types.KEY_SPECIAL_PROPERTY: x } ) )
				self.datastoreQuery = datastore.MultiQuery( filters, () )
			else:
				self.datastoreQuery = None
			return( self )
		#bones = [ (getattr( skel, key ), key) for key in dir( skel ) if not "__" in key and isinstance( getattr( skel, key ) , baseBone ) ]
		bones = [ (y,x) for x,y in skel.items() ]
		try:
			#First, filter non-relational bones
			for bone, key in [ x for x in bones if not isinstance( x[0], relationalBone ) ]:
				bone.buildDBFilter( key, skel, self, filters )
			#Second, process orderings of non-relational bones
			for bone, key in [ x for x in bones if not isinstance( x[0], relationalBone ) ]:
				bone.buildDBSort( key, skel, self, filters )
			#Now filter relational bones
			for bone, key in [ x for x in bones if isinstance( x[0], relationalBone ) ]:
				bone.buildDBFilter( key, skel, self, filters )
			#finally process orderings of relational bones
			for bone, key in [ x for x in bones if isinstance( x[0], relationalBone ) ]:
				bone.buildDBSort( key, skel, self, filters )
		except RuntimeError as e:
			logging.exception(e)
			self.datastoreQuery = None
			return( self )
		if "search" in filters.keys():
			if isinstance( filters["search"], list ):
				taglist = [ "".join([y for y in unicode(x).lower() if y in conf["viur.searchValidChars"] ] ) for x in filters["search"] ]
			else:
				taglist = [ "".join([y for y in unicode(x).lower() if y in conf["viur.searchValidChars"] ]) for x in unicode(filters["search"]).split(" ")] 
			assert not isinstance( self.datastoreQuery, datastore.MultiQuery ), "Searching using viur-tags is not possible on a query that already uses an IN-filter!"
			origFilter = self.datastoreQuery
			queries = []
			for tag in taglist[:30]: #Limit to max 30 keywords
				q = datastore.Query( kind=origFilter.__kind )
				q[ "viur_tags" ] = tag
				queries.append( q )
			self.datastoreQuery = datastore.MultiQuery( queries, origFilter.__orderings )
			for k, v in origFilter.items():
				self.datastoreQuery[ k ] = v
		if "cursor" in filters.keys() and filters["cursor"] and filters["cursor"].lower()!="none":
			self.cursor( filters["cursor"] )
		if "amount" in list(filters.keys()) and str(filters["amount"]).isdigit() and int( filters["amount"] ) >0 and int( filters["amount"] ) <= 100:
			self.limit( int(filters["amount"]) )
		if "postProcessSearchFilter" in dir( skel ):
			skel.postProcessSearchFilter( self, filters )
		return( self )
def get_namespace_keys(app, limit):
    """Get namespace keys."""
    ns_query = datastore.Query('__namespace__', keys_only=True, _app=app)
    return list(ns_query.Run(limit=limit, batch_size=limit))
Esempio n. 12
0
class CommentHelper(object):
    """ Helper de la clase comentario """
    def get_by_id(self, id):
        try:
            id = long(id)
        except:
            return None
        comment = Comment.get_by_id(id)
        if comment is not None:
            if comment.deleted:
                return None
            if comment._is_public():
                return comment
        return None

    def get_by_key(self, key):
        """
        Obtiene el evento con ese key
        """
        return Comment.get(key)

    def get_by_user(self, user, query_id=None, page=1, querier=None):
        """
        Obtiene una lista con todos los comentarios hechos por un usuario
        
            :param user: usuario a buscar
            :type user: :class:`geouser.models.User`
            :param query_id: identificador de la busqueda paginada
            :type query_id: :class:`long`
            :param page: pagina a buscar
            :type page: :class:`integer`
            
            :returns: [query_id, [:class:`geovote.models.Comment`]]
        """
        if querier is not None and not isinstance(querier, User):
            raise TypeError
        from georemindme.paging import PagedQuery
        from google.appengine.api import datastore
        q = datastore.Query('Comment', {
            'user ='******'deleted =': False
        })
        q.Order(('created', datastore.Query.DESCENDING))
        p = PagedQuery(q, id=query_id, page_size=7)
        comments = p.fetch_page(page)
        from georemindme.funcs import prefetch_refpropsEntity
        prefetch = prefetch_refpropsEntity(comments, 'user', 'instance')
        return [
            p.id,
            [{
                'id':
                comment.key().id(),
                'username':
                prefetch[comment['user']].username,
                'has_voted':
                Vote.objects.user_has_voted(querier, comment.key())
                if querier is not None else None,
                'vote_counter':
                comment['votes'],
                'instance':
                prefetch[comment['instance']],
                'msg':
                comment['msg'],
                'created':
                comment['created'],
            } for comment in comments]
        ]

    def get_by_instance(self,
                        instance,
                        query_id=None,
                        page=1,
                        querier=None,
                        async=False):
        """
        Obtiene una lista con todos los comentarios hechos en una instancia
        
            :param instance: objeto al que buscar los comentarios
            :type instance: :class:`db.Model`
            :param query_id: identificador de la busqueda paginada
            :type query_id: :class:`long`
            :param page: pagina a buscar
            :type page: :class:`integer`
            
            :returns: [query_id, [:class:`geovote.models.Comment`]]
        """
        if querier is not None and not querier.is_authenticated():
            querier = None
        if querier is not None and not isinstance(querier, User):
            raise TypeError
        if instance is None:
            return None
        from georemindme.paging import PagedQuery
        from google.appengine.api import datastore
        q = datastore.Query(kind='Comment',
                            filters={
                                'instance =': instance.key(),
                                'deleted =': False
                            })
        q.Order(('created', datastore.Query.DESCENDING))
        p = PagedQuery(q, id=query_id, page_size=7)
        if async:
            from google.appengine.datastore import datastore_query
            q = Comment.all().filter('instance =',
                                     instance).filter('deleted =',
                                                      False).order('-created')
            return p.id, q.run(config=datastore_query.QueryOptions(limit=7))
        comments = p.fetch_page(page)
        from georemindme.funcs import prefetch_refpropsEntity
        prefetch = prefetch_refpropsEntity(comments, 'user', 'instance')
        return [
            p.id,
            [{
                'id':
                comment.key().id(),
                'username':
                prefetch[comment['user']].username,
                'has_voted':
                Vote.objects.user_has_voted(querier, comment.key())
                if querier is not None else None,
                'vote_counter':
                comment['votes'],
                'instance':
                prefetch[comment['instance']],
                'msg':
                comment['msg'],
                'created':
                comment['created'],
            } for comment in comments]
        ]
 def DeleteAll(self):
     query = datastore.Query('ranker_node', keys_only=True)
     query.Ancestor(self.parent_key)
     datastore.Delete(list(query.Run()))
Esempio n. 14
0
def get_namespace_keys(app, limit):
    """Get namespace keys."""
    ns_query = datastore.Query('__namespace__', keys_only=True, _app=app)
    return ns_query.Get(limit=limit)
Esempio n. 15
0
  def split_input(cls, mapper_spec):
    """Splits query into shards without fetching query results.

    Tries as best as it can to split the whole query result set into equal
    shards. Due to difficulty of making the perfect split, resulting shards'
    sizes might differ significantly from each other. The actual number of
    shards might also be less then requested (even 1), though it is never
    greater.

    Args:
      mapper_spec: MapperSpec with params containing 'entity_kind'.
        May have 'namespace' in the params as a string containing a single
        namespace. If specified then the input reader will only yield values
        in the given namespace. If 'namespace' is not given then values from
        all namespaces will be yielded. May also have 'batch_size' in the params
        to specify the number of entities to process in each batch.

    Returns:
      A list of InputReader objects of length <= number_of_shards.
    """
    params = mapper_spec.params
    entity_kind_name = params[cls.ENTITY_KIND_PARAM]
    batch_size = int(params.get(cls.BATCH_SIZE_PARAM, cls._BATCH_SIZE))
    shard_count = mapper_spec.shard_count
    namespace = params.get(cls.NAMESPACE_PARAM)
    app = params.get(cls._APP_PARAM)

    if namespace is None:
      # It is difficult to efficiently shard large numbers of namespaces because
      # there can be an arbitrary number of them. So the strategy is:
      # 1. if there are a small number of namespaces in the datastore then
      #    generate one KeyRange per namespace per shard and assign each shard a
      #    KeyRange for every namespace. This should lead to nearly perfect
      #    sharding.
      # 2. if there are a large number of namespaces in the datastore then
      #    generate one NamespaceRange per worker. This can lead to very bad
      #    sharding because namespaces can contain very different numbers of
      #    entities and each NamespaceRange may contain very different numbers
      #    of namespaces.
      namespace_query = datastore.Query("__namespace__",
                                        keys_only=True,
                                        _app=app)
      namespace_keys = namespace_query.Get(
          limit=cls.MAX_NAMESPACES_FOR_KEY_SHARD+1)

      if len(namespace_keys) > cls.MAX_NAMESPACES_FOR_KEY_SHARD:
        ns_ranges = namespace_range.NamespaceRange.split(n=shard_count,
                                                         contiguous=True,
                                                         _app=app)
        return [cls(entity_kind_name,
                    key_ranges=None,
                    ns_range=ns_range,
                    batch_size=batch_size)
                for ns_range in ns_ranges]
      else:
        namespaces = [namespace_key.name() or ""
                      for namespace_key in namespace_keys]
    else:
      namespaces = [namespace]

    return cls._split_input_from_params(
        app, namespaces, entity_kind_name, params, shard_count)
Esempio n. 16
0
    def post(self):
        
        # PARSE CONTEXT AND PARAMETERS
         
        self.response.headers['Content-Type'] = 'application/json'

        path = self.request.path.split("/")        
        if len(path) < 3 or path[2] == '': 
            return self.bad_request(u'Please specify an entity kind\n')
                
        kind = path[2]
        
        # PROCESS INBOUND UPDATES
        
        keys_in = set()
        updates_in = json.loads(self.request.get("updates"))
        
        entities = self.get_config()[ENTITIES]
        if entities.has_key(kind):
            entity_config = entities[kind]
        else:
            entity_config = {}
        
        for update in updates_in:
            key_name_or_id = update[KEY]
            
            keys_in.add(key_name_or_id)
            
            if key_name_or_id[0] in "0123456789":
                key = datastore.Key.from_path(kind, int(key_name_or_id)) # KEY ID
                is_id = True
            else:                
                key = datastore.Key.from_path(kind, key_name_or_id) # KEY NAME
                is_id = False
            
            try: 
                entity = datastore.Get(key)

                if self.update_filter:
                    if not self.update_filter(self.request, kind, entity):
                        logging.error("update on existing entity is filter, key: %s" % key_name_or_id)
                        continue
                    
            except datastore_errors.EntityNotFoundError:
                if is_id:
                    entity = datastore.Entity(kind=kind,id=int(key_name_or_id))
                else:
                    entity = datastore.Entity(kind=kind,name=key_name_or_id)
                
            for attr_name in update:
                if attr_name != KEY and attr_name != TIMESTAMP and attr_name != IS_DIRTY:
                    if entity_config.has_key(attr_name):
                        attr_config = entity_config[attr_name]
                    else:
                        attr_config = None
                        
                    if attr_name == TIMESTAMP:
                        value = datetime.datetime.now()
                    else:
                        value = update[attr_name] 
                        
                    entity[attr_name] = js_to_appengine_value(value, attr_config)

            # update timestamp to current time so that it get's picked up by other clients
            entity[TIMESTAMP] = datetime.datetime.now()                                    

            if self.update_filter:
                if not self.update_filter(self.request, kind, entity):
                    logging.error("updated entity is filtered, key: %s" % key_name_or_id)
                    continue
                    
            datastore.Put(entity)
            
            
        # PROCESS OUTBOUND UPDATES            

        batch_size = int(self.request.get("count"))

        updates_out = []
        
        query = datastore.Query(kind)
            
        f = self.request.get("from") 
        if f: 
            query['%s >= ' % TIMESTAMP] = datetime_from_iso(f)
            
        if self.query_filter:
            self.query_filter(self.request, kind, query)
    
        query.Order(TIMESTAMP)
        
        batch_start_timestamp = appengine_to_js_value(datetime.datetime.now())
            
        entities = query.Get(batch_size, 0)
        
        for entity in entities:
            key = appengine_to_js_value(entity.key())
            
            # do not send back updates that we just received from the same client
            if not key in keys_in:                                
                update = {
                    KEY: key
                }
                
                for field, value in entity.items():                        
                    update[field] = appengine_to_js_value(value)
                            
                updates_out.append(update)
                        
        res = json.dumps({
            "updates": updates_out, 
            "timestamp": batch_start_timestamp,
        })
        
        self.response.out.write(res)
Esempio n. 17
0
def _DailyBackupExists():
    expected_backup_name = '%s_%s' % (_BACKUP_PREFIX, _CreateDayString())
    query = datastore.Query(kind='_AE_Backup_Information', keys_only=True)
    query['name ='] = expected_backup_name
    return query.Get(1)
Esempio n. 18
0
 def _get_taskmarker_query(self, namespace=""):
     return datastore.Query("ShardedTaskMarker", namespace=namespace)
Esempio n. 19
0
class ListHelper(object):
    _klass = List

    def get_all_public(self, query_id=None, page=1):
        '''
        Devuelve todas las listas publicas ¡PAGINADA!

            :param page: numero de pagina a mostrar
            :type param: int
            :param query_id: identificador de busqueda
            :type query_id: int
        '''
        q = self._klass.all().filter('_vis =',
                                     'public').filter('active =',
                                                      True).order('-modified')
        from georemindme.paging import PagedQuery
        p = PagedQuery(q, id=query_id)
        lists = p.fetch_page(page)
        prefetch_refprops(lists, self._klass.user)
        return [p.id, lists]

    def get_by_id(self, id):
        '''
        Devuelve la lista publica con ese ID

            :param id: identificador de la lista
            :type id: :class:`Integer`
            :returns: None o :class:`geolist.models.List`
        '''
        try:
            id = int(id)
        except:
            raise TypeError
        list = self._klass.get_by_id(id)
        return list

    def get_by_name_user(self, name, user):
        list = self._klass.all().filter('user ='******'name =', name).filter(
                                            'active =', True).get()
        return list

    def get_by_id_querier(self, id, querier):
        '''
        Devuelve la lista publica con ese ID

            :param id: identificador de la lista
            :type id: :class:`Integer`
            :returns: None o :class:`geolist.models.List`
        '''
        if not isinstance(querier, User) and not isinstance(
                querier, AnonymousUser):
            raise TypeError()
        list = self.get_by_id(id)
        if list is None:
            return None
        if list.__class__.user.get_value_for_datastore(list) == querier.key():
            return list
        if hasattr(list, '_vis'):
            if list._is_public():
                return list
            elif list._is_shared() and list.user_invited(querier):
                return list
        return None

    def get_by_id_user(self, id, user):
        '''
        Devuelve la lista con ese ID y el usuario como dueño.

            :param id: identificador de la lista
            :type id: :class:`Integer`
            :param user: usuario
            :type user: :class:`geouser.models.User`
            :returns: None o :class:`geolist.models.List`
        '''
        list = self.get_by_id(id)
        if list is not None:
            if not list.active:
                return None
            if list.__class__.user.get_value_for_datastore(list) == user.key():
                return list
        return None

    def get_list_user_following(self, user, resolve=False, async=False):
        '''
        Devuelve las listas a las que sigue el usuario

            :param user: usuario del que buscar las listas
            :type user: :class:`geouser.models.User`
        '''
        if not user.is_authenticated():
            return []
        from google.appengine.api import datastore

        if async:
            indexes = ListFollowersIndex.all(keys_only=True).filter(
                'keys =', user.key())
            return indexes.run()
        q = datastore.Query('ListFollowersIndex', {'keys =': user.key()},
                            keys_only=True)
        run = q.Run()
        indexes = [a for a in run]
        lists = model_plus.fetch_parentsKeys(indexes)
        return [list.to_dict(resolve=resolve) for list in lists if list.active]
Esempio n. 20
0
    def get_notifications_timeline(self, query_id=None):
        from models_acc import UserTimelineBase, UserTimeline, UserTimelineSuggest

        def prefetch_timeline(entities):
            # from http://blog.notdot.net/2010/01/ReferenceProperty-prefetching-in-App-Engine
            """
                Carga todos los timelines apuntados por _Notifications
                de una sola vez
            """
            from models_acc import UserTimelineSuggest, UserTimeline
            from geovote.models import Comment, Vote
            from geolist.models import List, ListSuggestion
            from geoalert.models import Event, Suggestion
            ref_keys = [x['timeline'] for x in entities]
            timelines = model_plus.get(set(ref_keys))
            timelines = filter(None, timelines)
            # precargar las referencias
            timelines = model_plus.prefetch(timelines, UserTimeline.instance,
                                            UserTimeline.user)
            from helpers_acc import _load_ref_instances
            return timelines, _load_ref_instances(timelines)

        from models_utils import _Notification
        if query_id is None:
            query = datastore.Query(kind='_Notification',
                                    filters={'owner =': self.key()})
        if query_id is not None:
            query = datastore.Query(
                kind='_Notification',
                filters={'owner =': self.key()},
                cursor=datastore.datastore_query.Cursor.from_websafe_string(
                    query_id))
        #query.Ancestor(self.key())
        query.Order(('_created', datastore.Query.DESCENDING))
        timelines = query.Get(TIMELINE_PAGE_SIZE)
        if not any(timelines):
            return []
        timelines, instances = prefetch_timeline(timelines)
        from operator import attrgetter
        timelines = sorted(timelines, key=attrgetter('modified'), reverse=True)
        import logging
        for t in timelines:
            a = instances.get(UserTimeline.instance.get_value_for_datastore(t),
                              None)
            if a is None:
                logging.info('INSTANCE: %s' % t.instance.__class__)
        return [
            query.GetCursor().to_websafe_string(),
            [{
                'id':
                timeline.id,
                'created':
                timeline.created,
                'modified':
                timeline.modified,
                'msg':
                timeline.msg,
                'username':
                timeline.user.username,
                'msg_id':
                timeline.msg_id,
                'instance':
                instances.get(
                    UserTimeline.instance.get_value_for_datastore(timeline),
                    timeline.instance),
                'list':
                instances.get(
                    UserTimelineSuggest.list_id.get_value_for_datastore(
                        timeline), timeline.list_id) if isinstance(
                            timeline, UserTimelineSuggest) else None,
                'status':
                timeline.status if hasattr(timeline, 'status') else None,
                'is_private':
                False,
            } for timeline in timelines]
        ]
Esempio n. 21
0
def resave_pastes_task():
    dt = datetime.datetime.utcnow()
    name = 'resave-pastes {}'.format(dt)
    q = 'resave-pastes'
    query = datastore.Query('Paste')
    mapper_library.start_mapping(name, query, resave_paste, queue=q)
Esempio n. 22
0
def update_entity(app,
                  kind,
                  id,
                  data,
                  metadata_entity,
                  user,
                  put_function=None,
                  rebuild_facets=False):
    # Start transaction
    # Get entity
    # Apply changes to entity
    # Add new revision (generate revision id & date)
    # Store transaction
    key = datastore.Key.from_path(kind, id, namespace=app)
    newest_date = datetime.datetime.min
    newest_entity = None
    rev = "1"
    date = datetime.datetime.now()
    revision_list = []
    try:
        entity = datastore.Get(key)
        # Entity was fetched. Find all revisions.
        query = datastore.Query(namespace=app)
        query.Ancestor(key)
        for r in query.Run():
            #logging.info(r.kind() + " " + r.key().name())
            if (r.kind() != "Revision"):
                continue
            assert (r.kind() == "Revision")
            if (r['date'] > newest_date):
                newest_entity = r
                newest_date = r['date']
            if rebuild_facets:
                revision_list.append(r)
        if newest_entity:
            rev = str(int(newest_entity['rev']) + 1)
            date = max(
                date,
                newest_entity['date'] + datetime.timedelta(microseconds=1))
    except datastore_errors.EntityNotFoundError:
        entity = datastore.Entity(kind, name=id, namespace=app)
        assert (entity.key() == key)

    # See if an update is even needed.
    existing_data = {}
    for p in entity.keys():
        if p != 'rev' and p != 'date' and not is_facet_property(p):
            existing_data[p] = entity[p]
    for p in data.keys():
        if data[p] is None and not p in existing_data:
            del data[p]
    changed = False
    for p in data.keys():
        if p not in existing_data:
            changed = True
        elif existing_data[p] != data[p]:
            changed = True
        if changed:
            break

    current_facets = {}
    new_facets = {}
    facets = GetFacetsForApp(app, metadata_entity)

    for p in entity.keys():
        if is_facet_property(p):
            current_facets[p] = entity[p]

    def find_facets(obj):
        for p in facets:
            if p in obj:
                fp = facet_property_name(p)
                value = obj[p]
                if (not value is None and
                    ((fp in current_facets and not value in current_facets[fp])
                     or (not fp in current_facets))):
                    if fp not in new_facets:
                        new_facets[fp] = []
                    if not value in new_facets[fp]:
                        new_facets[fp].append(value)

    find_facets(entity)
    find_facets(data)
    for r in revision_list:
        try:
            data = json.loads(r['data'])
            find_facets(data)
        except json.JSONDecodeError:
            logging.error("Could not parse JSON from Revision: " +
                          str(r.key()))

    if not changed and len(new_facets) == 0:
        return

    #logging.info("Facets differ: " + str(new_facets))

    #logging.info("Data differs: " +
    #             str(set(data.items()).difference(set(existing_data.items()))))
    entity.update(data)
    for p in new_facets:
        if p not in entity:
            entity[p] = new_facets[p]
        else:
            entity[p].extend(new_facets[p])
    for p in entity.keys():
        if entity[p] is None:
            # If we instead do a del entity[p] here, the property would be gone.
            # In this special case, we need to always maintain null properties,
            # so clients can sync this state.
            entity[p] = None
    if changed:
        entity['rev'] = rev
        entity['date'] = date
    if put_function:
        put_function(entity)
    else:
        datastore.Put(entity)
    memcache.delete(memcache_key(app, kind, id))
    if changed:
        change = Revision(key=datastore.Key.from_path("Revision",
                                                      rev,
                                                      parent=entity.key(),
                                                      namespace=app),
                          type=kind,
                          parent_id=entity.key().id_or_name(),
                          data=json.dumps(data, use_decimal=True),
                          rev=rev,
                          date=date,
                          user=user)
        if put_function:
            put_function(change)
        else:
            change.put()
Esempio n. 23
0
def convert_peelings_task():
    dt = datetime.datetime.utcnow()
    name = 'convert-peelings {}'.format(dt)
    q = 'convert-peelings'
    query = datastore.Query('pastes_paste')
    mapper_library.start_mapping(name, query, convert_peeling, queue=q)
Esempio n. 24
0
    def _build_gae_query(self):
        """ Build and return the Datstore Query object. """
        combined_filters = []

        query_kwargs = {}

        if self.keys_only:
            query_kwargs["keys_only"] = self.keys_only
        elif self.projection:
            query_kwargs["projection"] = self.projection

        query = Query(
            self.db_table,
            **query_kwargs
        )

        if has_concrete_parents(self.model) and not self.model._meta.proxy:
            query["class ="] = self.model._meta.db_table

        DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where))
        for column, op, value in self.where:
            if column == self.pk_col:
                column = "__key__"

            final_op = OPERATORS_MAP.get(op)
            if final_op is None:
                if op in REQUIRES_SPECIAL_INDEXES:
                    add_special_index(self.model, column, op) #Add the index if we can (e.g. on dev_appserver)

                    if op not in special_indexes_for_column(self.model, column):
                        raise RuntimeError("There is a missing index in your djangaeidx.yaml - \n\n{0}:\n\t{1}: [{2}]".format(
                            self.model, column, op)
                        )

                    indexer = REQUIRES_SPECIAL_INDEXES[op]
                    column = indexer.indexed_column_name(column)
                    value = indexer.prep_value_for_query(value)
                    query["%s =" % column] = value
                else:
                    if op == "in":
                        combined_filters.append((column, op, value))
                    elif op == "gt_and_lt":
                        combined_filters.append((column, op, value))
                    elif op == "isnull":
                        query["%s =" % column] = None
                    elif op == "startswith":
                        #You can emulate starts with by adding the last unicode char
                        #to the value, then doing <=. Genius.
                        query["%s >=" % column] = value
                        if isinstance(value, str):
                            value = value.decode("utf-8")
                        value += u'\ufffd'
                        query["%s <=" % column] = value
                    else:
                        raise NotImplementedError("Unimplemented operator {0}".format(op))
            else:
                query["%s %s" % (column, final_op)] = value

        ordering = []
        for order in self.ordering:
            if isinstance(order, int):
                direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING
                order = self.queried_fields[0]
            else:
                direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING
                order = order.lstrip("-")

            if order == self.model._meta.pk.column:
                order = "__key__"
            ordering.append((order, direction))

        if combined_filters:
            queries = [ query ]
            for column, op, value in combined_filters:
                new_queries = []
                for query in queries:
                    if op == "in":
                        for val in value:
                            new_query = datastore.Query(self.model._meta.db_table)
                            new_query.update(query)
                            new_query["%s =" % column] = val
                            new_queries.append(new_query)
                    elif op == "gt_and_lt":
                        for tmp_op in ("<", ">"):
                            new_query = datastore.Query(self.model._meta.db_table)
                            new_query.update(query)
                            new_query["%s %s" % (column, tmp_op)] = value
                            new_queries.append(new_query)
                queries = new_queries

            query = datastore.MultiQuery(queries, ordering)
        elif ordering:
            query.Order(*ordering)
        return query
Esempio n. 25
0
    def get(self):
        path = self.request.path.split("/")

        self.response.headers['Content-Type'] = 'text/xml'

        if SECRET_KEY == CHANGE_THIS:
            return self.unauthorized(
                "Please change the default secret key in key.py")

        if self.request.get("secret_key") != SECRET_KEY:
            return self.unauthorized()

        if len(path) < 3 or path[2] == '':
            return self.bad_request("Please specify an entity kind")

        kind = path[2]

        self.response.out.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
        self.response.out.write(u'<updates>\n')

        query = datastore.Query(kind)

        timestamp_field = self.request.get("timestamp")
        if not timestamp_field:
            timestamp_field = DEFAULT_TIMESTAMP_FIELD

        batch_size = self.request.get("count")
        if not batch_size:
            batch_size = DEFAULT_BATCH_SIZE
        else:
            batch_size = int(batch_size)

        f = self.request.get("from")
        if f:
            query['%s >' % timestamp_field] = from_iso(f)

        query.Order(timestamp_field)

        entities = query.Get(batch_size, 0)

        for entity in entities:
            self.response.out.write(
                u'    <%s key="%s">\n' %
                (kind, ae_to_rocket(TYPE_KEY, entity.key())))

            for field, value in entity.items():
                if isinstance(value, list):
                    if len(value) > 0 and value[0] != None:
                        field_type = get_type(value[0])
                        self.response.out.write(
                            u'        <%s type="%s" list="true">\n' %
                            (field, field_type))
                        for item in value:
                            self.response.out.write(
                                u"            <item>%s</item>\n" %
                                ae_to_rocket(field_type, item))
                        self.response.out.write(u'</%s>\n' % field)
                else:
                    if value != None:
                        if field == timestamp_field:
                            field_type = TYPE_TIMESTAMP
                        else:
                            field_type = get_type(value)

                        self.response.out.write(
                            u'        <%s type="%s">%s</%s>\n' %
                            (field, field_type, ae_to_rocket(
                                field_type, value), field))

            self.response.out.write(u'    </%s>\n' % kind)

        self.response.out.write(u'</updates>')