Beispiel #1
0
    def _MinimalQueryInfo(self, query):
        """Extract the minimal set of information for query matching.

    Args:
      query: datastore_pb.Query instance from which to extract info.

    Returns:
      datastore_pb.Query instance suitable for matching against when
      validating cursors.
    """
        query_info = datastore_pb.Query()
        query_info.set_app(query.app())

        for filter in query.filter_list():
            query_info.filter_list().append(filter)
        for order in query.order_list():
            query_info.order_list().append(order)

        if query.has_ancestor():
            query_info.mutable_ancestor().CopyFrom(query.ancestor())

        for attr in ('kind', 'name_space', 'search_query'):
            query_has_attr = getattr(query, 'has_%s' % attr)
            query_attr = getattr(query, attr)
            query_info_set_attr = getattr(query_info, 'set_%s' % attr)
            if query_has_attr():
                query_info_set_attr(query_attr())

        return query_info
Beispiel #2
0
    def _DecodeCompiledCursor(self, query, compiled_cursor):
        """Converts a compiled_cursor into a cursor_entity.

    Returns:
      (cursor_entity, inclusive): a datastore.Entity and if it should be
      included in the result set.
    """
        assert len(compiled_cursor.position_list()) == 1

        position = compiled_cursor.position(0)
        entity_pb = datastore_pb.EntityProto()
        (query_info_encoded,
         entity_encoded) = position.start_key().split(_CURSOR_CONCAT_STR, 1)
        query_info_pb = datastore_pb.Query()
        query_info_pb.ParseFromString(query_info_encoded)
        self._ValidateQuery(query, query_info_pb)

        entity_pb.ParseFromString(entity_encoded)
        return (datastore.Entity._FromPb(entity_pb,
                                         True), position.start_inclusive())
Beispiel #3
0
    def _Dynamic_RunQuery(self, query, query_result):
        client = self._GetRiakClient()
        kind = query.kind()
        keys_only = query.keys_only()
        filters = query.filter_list()
        orders = query.order_list()
        offset = query.offset()
        limit = query.limit()
        namespace = query.name_space()
        logging.debug('offset: %d limit: %d' %(offset, limit))

        # query history
        clone = datastore_pb.Query()
        clone.CopyFrom(query)
        clone.clear_hint()
        # FIXME: use a hashable object for history
        clone.__hash__ = lambda: hash(str(clone))
        if clone in self.__query_history:
            self.__query_history[clone] += 1
        else:
            self.__query_history[clone] = 1
        
        entity_bucket_name = '%s_%s_%s' % (self.__app_id, namespace, kind)
        entity_bucket = client.bucket(entity_bucket_name)
        binary_bucket_name = entity_bucket_name + _BINARY_BUCKET_SUFFIX
        binary_bucket = client.bucket(binary_bucket_name)
        
        operators = {datastore_pb.Query_Filter.LESS_THAN:             '<',
                     datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL:    '<=',
                     datastore_pb.Query_Filter.GREATER_THAN:          '>',
                     datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=',
                     datastore_pb.Query_Filter.EQUAL:                 '==',
                     }

        queue = Queue.Queue()
        index_key_sets = []
        index_query_threads = []
        for filt in filters:
            assert filt.op() != datastore_pb.Query_Filter.IN
            prop = filt.property(0).name().decode('utf-8')
            op = operators[filt.op()]
            filter_val_list = [datastore_types.FromPropertyPb(filter_prop)
                                    for filter_prop in filt.property_list()]
            filter_val = self.__get_filter_value_for_query(filter_val_list[0])
            
            # spawn new thread to do the index query
            thd = threading.Thread(target=self.__filter_to_index_query, 
                                   args=(entity_bucket_name, prop, op, filter_val, queue))
            index_query_threads.append(thd)
            thd.start()

        # wait for the index query threads to finish
        [thd.join() for thd in index_query_threads]
        
        # get the index key sets from the shared queue
        while not queue.empty():
            index_key_sets.append(queue.get())

        if index_key_sets:
            mapreduce_inputs = reduce(lambda x, y: x.intersection(y), index_key_sets)
        else:
            mapreduce_inputs = set()
        
        logging.info('mapreduce input: %d keys' % len(mapreduce_inputs))
        if filters and not mapreduce_inputs:
            results = []
        else:
            # key inputs to MapReduce Job
            if not mapreduce_inputs:
                riak_query = client.add(entity_bucket_name)
            else:
                riak_query = riak.RiakMapReduce(client)
                for input in mapreduce_inputs:
                    riak_query.add(entity_bucket_name, input)
    
            riak_query.map(_JS_MAP_FUNCTION)
            
            for order in orders:
                prop = order.property().decode('utf-8')
                if order.direction() is datastore_pb.Query_Order.DESCENDING:
                    direction = 'desc'
                else:
                    direction = 'asc'
                logging.debug('sort(%s, %s)' %(prop, direction))
                riak_query.reduce(_JS_REDUCE_SORT_FUNCTION, 
                                  {'arg': {'by': prop, 'order': direction}})
    
            if limit:
                # reduce phase for applying limit
                start = offset
                end = offset + limit
                if (end > len(mapreduce_inputs)) and filters: end = 0;
                logging.debug('slice(start: %d, end:%d)' %(start, end))
                riak_query.reduce('Riak.reduceSlice', {'arg': [start, end]})
    
            for phase in riak_query._phases:
                logging.debug(phase.to_array())
            
            results = []
            for result in riak_query.run():
                metadata, riak_entity = result
                key = metadata['X-Riak-Meta-Key']
                key = datastore_types.Key(encoded=key)
                entity = datastore.Entity(kind=kind, 
                                          parent=key.parent(), 
                                          name=key.name(), 
                                          id=key.id())
                for property_name, property_value in riak_entity.iteritems():
                    if property_name == '__key__':
                        continue
                    try:
                        property_type_name = metadata['X-Riak-Meta-%s' % property_name.capitalize()]
                    except KeyError:
                        property_type_name = metadata['X-Riak-Meta-%s' % property_name]
                    property_value = self.__create_value_for_riak_value(property_type_name, 
                                                                        property_value, 
                                                                        binary_bucket)
                    entity[property_name] = property_value
                results.append(entity)

        query.set_app(self.__app_id)
        datastore_types.SetNamespace(query, namespace)
        encoded = datastore_types.EncodeAppIdNamespace(self.__app_id, namespace)

        #cursor = _Cursor(query, results)
        #self.__queries[cursor.cursor] = cursor
    
        #if query.has_count():
        #    count = query.count()
        #elif query.has_limit():
        #    count = query.limit()
        #else:
        #    count = _BATCH_SIZE
    
        #cursor.PopulateQueryResult(query_result, count,
        #                             query.offset(), compile=query.compile())

        query_result.result_list().extend(r._ToPb() for r in results)
        query_result.set_skipped_results(len(results))
        query_result.set_keys_only(keys_only)

        if query.compile():
            compiled_query = query_result.mutable_compiled_query()
            compiled_query.set_keys_only(query.keys_only())
            compiled_query.mutable_primaryscan().set_index_name(query.Encode())