Exemple #1
0
    def _Dynamic_RunQuery(self, query, query_result):
        client = self._GetRiakClient()
        kind = query.kind()
        keys_only = query.keys_only()
        filters = query.filter_list()
        orders = query.order_list()
        offset = query.offset()
        limit = query.limit()
        namespace = query.name_space()
        logging.debug('offset: %d limit: %d' %(offset, limit))

        # query history
        clone = datastore_pb.Query()
        clone.CopyFrom(query)
        clone.clear_hint()
        # FIXME: use a hashable object for history
        clone.__hash__ = lambda: hash(str(clone))
        if clone in self.__query_history:
            self.__query_history[clone] += 1
        else:
            self.__query_history[clone] = 1
        
        entity_bucket_name = '%s_%s_%s' % (self.__app_id, namespace, kind)
        entity_bucket = client.bucket(entity_bucket_name)
        binary_bucket_name = entity_bucket_name + _BINARY_BUCKET_SUFFIX
        binary_bucket = client.bucket(binary_bucket_name)
        
        operators = {datastore_pb.Query_Filter.LESS_THAN:             '<',
                     datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL:    '<=',
                     datastore_pb.Query_Filter.GREATER_THAN:          '>',
                     datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=',
                     datastore_pb.Query_Filter.EQUAL:                 '==',
                     }

        queue = Queue.Queue()
        index_key_sets = []
        index_query_threads = []
        for filt in filters:
            assert filt.op() != datastore_pb.Query_Filter.IN
            prop = filt.property(0).name().decode('utf-8')
            op = operators[filt.op()]
            filter_val_list = [datastore_types.FromPropertyPb(filter_prop)
                                    for filter_prop in filt.property_list()]
            filter_val = self.__get_filter_value_for_query(filter_val_list[0])
            
            # spawn new thread to do the index query
            thd = threading.Thread(target=self.__filter_to_index_query, 
                                   args=(entity_bucket_name, prop, op, filter_val, queue))
            index_query_threads.append(thd)
            thd.start()

        # wait for the index query threads to finish
        [thd.join() for thd in index_query_threads]
        
        # get the index key sets from the shared queue
        while not queue.empty():
            index_key_sets.append(queue.get())

        if index_key_sets:
            mapreduce_inputs = reduce(lambda x, y: x.intersection(y), index_key_sets)
        else:
            mapreduce_inputs = set()
        
        logging.info('mapreduce input: %d keys' % len(mapreduce_inputs))
        if filters and not mapreduce_inputs:
            results = []
        else:
            # key inputs to MapReduce Job
            if not mapreduce_inputs:
                riak_query = client.add(entity_bucket_name)
            else:
                riak_query = riak.RiakMapReduce(client)
                for input in mapreduce_inputs:
                    riak_query.add(entity_bucket_name, input)
    
            riak_query.map(_JS_MAP_FUNCTION)
            
            for order in orders:
                prop = order.property().decode('utf-8')
                if order.direction() is datastore_pb.Query_Order.DESCENDING:
                    direction = 'desc'
                else:
                    direction = 'asc'
                logging.debug('sort(%s, %s)' %(prop, direction))
                riak_query.reduce(_JS_REDUCE_SORT_FUNCTION, 
                                  {'arg': {'by': prop, 'order': direction}})
    
            if limit:
                # reduce phase for applying limit
                start = offset
                end = offset + limit
                if (end > len(mapreduce_inputs)) and filters: end = 0;
                logging.debug('slice(start: %d, end:%d)' %(start, end))
                riak_query.reduce('Riak.reduceSlice', {'arg': [start, end]})
    
            for phase in riak_query._phases:
                logging.debug(phase.to_array())
            
            results = []
            for result in riak_query.run():
                metadata, riak_entity = result
                key = metadata['X-Riak-Meta-Key']
                key = datastore_types.Key(encoded=key)
                entity = datastore.Entity(kind=kind, 
                                          parent=key.parent(), 
                                          name=key.name(), 
                                          id=key.id())
                for property_name, property_value in riak_entity.iteritems():
                    if property_name == '__key__':
                        continue
                    try:
                        property_type_name = metadata['X-Riak-Meta-%s' % property_name.capitalize()]
                    except KeyError:
                        property_type_name = metadata['X-Riak-Meta-%s' % property_name]
                    property_value = self.__create_value_for_riak_value(property_type_name, 
                                                                        property_value, 
                                                                        binary_bucket)
                    entity[property_name] = property_value
                results.append(entity)

        query.set_app(self.__app_id)
        datastore_types.SetNamespace(query, namespace)
        encoded = datastore_types.EncodeAppIdNamespace(self.__app_id, namespace)

        #cursor = _Cursor(query, results)
        #self.__queries[cursor.cursor] = cursor
    
        #if query.has_count():
        #    count = query.count()
        #elif query.has_limit():
        #    count = query.limit()
        #else:
        #    count = _BATCH_SIZE
    
        #cursor.PopulateQueryResult(query_result, count,
        #                             query.offset(), compile=query.compile())

        query_result.result_list().extend(r._ToPb() for r in results)
        query_result.set_skipped_results(len(results))
        query_result.set_keys_only(keys_only)

        if query.compile():
            compiled_query = query_result.mutable_compiled_query()
            compiled_query.set_keys_only(query.keys_only())
            compiled_query.mutable_primaryscan().set_index_name(query.Encode())
Exemple #2
0
    def _Dynamic_RunQuery(self, query, query_result):
        client = self._GetThriftClient()
        kind = query.kind()
        keys_only = query.keys_only()
        filters = query.filter_list()
        orders = query.order_list()
        offset = query.offset()
        limit = query.limit()
        namespace = query.name_space()

        if filters or orders:
            row_limit = 0
        else:
            row_limit = offset + limit

        scanner_id = None
        try:
            ns = client.open_namespace('%s/%s' % (self.__app_id, namespace))
            scanner_id = client.open_scanner(
                ns, kind,
                ScanSpec(columns=['entity'],
                         row_limit=row_limit,
                         revs=1,
                         keys_only=keys_only), True)
            total_cells = []

            while True:
                cells = client.next_cells(scanner_id)
                if len(cells) > 0:
                    total_cells += cells
                else:
                    break
        except ClientException:
            log.warning('No data for %s' % kind)
            client.close()
            return
        finally:
            if scanner_id:
                client.close_scanner(scanner_id)

        # make a cell-key dictionary
        key_cell_dict = {}
        for cell in total_cells:
            if key_cell_dict.has_key(cell.key.row):
                key_cell_dict[cell.key.row].append(cell)
            else:
                key_cell_dict[cell.key.row] = [cell]

        pb_entities = []
        for key in key_cell_dict:
            key_obj = datastore_types.Key(encoded=key)
            key_pb = key_obj._ToPb()
            for cell in key_cell_dict[key]:
                if cell.key.column_family == 'entity' and cell.key.column_qualifier == 'proto':
                    entity_proto = entity_pb.EntityProto(str(cell.value))
                    entity_proto.mutable_key().CopyFrom(key_pb)
                    pb_entities.append(entity_proto)

        results = map(lambda entity: datastore.Entity.FromPb(entity),
                      pb_entities)

        query.set_app(self.__app_id)
        datastore_types.SetNamespace(query, namespace)
        encoded = datastore_types.EncodeAppIdNamespace(self.__app_id,
                                                       namespace)

        operators = {
            datastore_pb.Query_Filter.LESS_THAN: '<',
            datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: '<=',
            datastore_pb.Query_Filter.GREATER_THAN: '>',
            datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=',
            datastore_pb.Query_Filter.EQUAL: '==',
        }

        def has_prop_indexed(entity, prop):
            """Returns True if prop is in the entity and is indexed."""
            if prop in datastore_types._SPECIAL_PROPERTIES:
                return True
            elif prop in entity.unindexed_properties():
                return False

            values = entity.get(prop, [])
            if not isinstance(values, (tuple, list)):
                values = [values]

            for value in values:
                if type(value) not in datastore_types._RAW_PROPERTY_TYPES:
                    return True
            return False

        for filt in filters:
            assert filt.op() != datastore_pb.Query_Filter.IN

            prop = filt.property(0).name().decode('utf-8')
            op = operators[filt.op()]

            filter_val_list = [
                datastore_types.FromPropertyPb(filter_prop)
                for filter_prop in filt.property_list()
            ]

            def passes_filter(entity):
                """Returns True if the entity passes the filter, False otherwise.
		
				The filter being evaluated is filt, the current filter that we're on
				in the list of filters in the query.
				"""
                if not has_prop_indexed(entity, prop):
                    return False

                try:
                    entity_vals = datastore._GetPropertyValue(entity, prop)
                except KeyError:
                    entity_vals = []

                if not isinstance(entity_vals, list):
                    entity_vals = [entity_vals]

                for fixed_entity_val in entity_vals:
                    for filter_val in filter_val_list:
                        fixed_entity_type = self._PROPERTY_TYPE_TAGS.get(
                            fixed_entity_val.__class__)
                        filter_type = self._PROPERTY_TYPE_TAGS.get(
                            filter_val.__class__)
                        if fixed_entity_type == filter_type:
                            comp = u'%r %s %r' % (fixed_entity_val, op,
                                                  filter_val)
                        elif op != '==':
                            comp = '%r %s %r' % (fixed_entity_type, op,
                                                 filter_type)
                        else:
                            continue

                        logging.log(logging.DEBUG - 1,
                                    'Evaling filter expression "%s"', comp)

                        try:
                            ret = eval(comp)
                            if ret and ret != NotImplementedError:
                                return True
                        except TypeError:
                            pass

                return False

            results = filter(passes_filter, results)

        for order in orders:
            prop = order.property().decode('utf-8')
            results = [
                entity for entity in results if has_prop_indexed(entity, prop)
            ]

        def order_compare_entities(a, b):
            """ Return a negative, zero or positive number depending on whether
			entity a is considered smaller than, equal to, or larger than b,
			according to the query's orderings. """
            cmped = 0
            for o in orders:
                prop = o.property().decode('utf-8')

                reverse = (o.direction() is
                           datastore_pb.Query_Order.DESCENDING)

                a_val = datastore._GetPropertyValue(a, prop)
                if isinstance(a_val, list):
                    a_val = sorted(a_val,
                                   order_compare_properties,
                                   reverse=reverse)[0]

                b_val = datastore._GetPropertyValue(b, prop)
                if isinstance(b_val, list):
                    b_val = sorted(b_val,
                                   order_compare_properties,
                                   reverse=reverse)[0]

                cmped = order_compare_properties(a_val, b_val)

                if o.direction() is datastore_pb.Query_Order.DESCENDING:
                    cmped = -cmped

                if cmped != 0:
                    return cmped

            if cmped == 0:
                return cmp(a.key(), b.key())

        def order_compare_properties(x, y):
            """Return a negative, zero or positive number depending on whether
			property value x is considered smaller than, equal to, or larger than
			property value y. If x and y are different types, they're compared based
			on the type ordering used in the real datastore, which is based on the
			tag numbers in the PropertyValue PB.
			"""
            if isinstance(x, datetime.datetime):
                x = datastore_types.DatetimeToTimestamp(x)
            if isinstance(y, datetime.datetime):
                y = datastore_types.DatetimeToTimestamp(y)

            x_type = self._PROPERTY_TYPE_TAGS.get(x.__class__)
            y_type = self._PROPERTY_TYPE_TAGS.get(y.__class__)

            if x_type == y_type:
                try:
                    return cmp(x, y)
                except TypeError:
                    return 0
            else:
                return cmp(x_type, y_type)

        results.sort(order_compare_entities)

        cursor = _Cursor(query, results, order_compare_entities)
        self.__queries[cursor.cursor] = cursor

        if query.has_count():
            count = query.count()
        elif query.has_limit():
            count = query.limit()
        else:
            count = _BATCH_SIZE

        cursor.PopulateQueryResult(query_result,
                                   count,
                                   query.offset(),
                                   compile=query.compile())

        if query.compile():
            compiled_query = query_result.mutable_compiled_query()
            compiled_query.set_keys_only(query.keys_only())
            compiled_query.mutable_primaryscan().set_index_name(query.Encode())
        client.close()
Exemple #3
0
    def _Dynamic_RunQuery(self, query, query_result):
        kind = query.kind()
        keys_only = query.keys_only()
        filters = query.filter_list()
        orders = query.order_list()
        offset = query.offset()
        limit = query.limit()
        namespace = query.name_space()
        #predicate = query.predicate()

        table_name = str('%s_%s' % (self._app_id, kind))
        table = self._client.open_table(table_name)
        scan_spec_builder = ht.ScanSpecBuilder()
        scan_spec_builder.set_max_versions(1)
        if filters or orders:
            scan_spec_builder.set_row_limit(0)
        else:
            scan_spec_builder.set_row_limit(offset + limit)
        # get the hypertable cells
        total_cells = [
            cell for cell in table.create_scanner(scan_spec_builder)
        ]

        # make a cell-key dictionary
        key_cell_dict = {}
        for cell in total_cells:
            if key_cell_dict.has_key(cell.row_key):
                key_cell_dict[cell.row_key].append(cell)
            else:
                key_cell_dict[cell.row_key] = [cell]

        results = []
        for key in key_cell_dict:
            key_obj = datastore_types.Key(encoded=key)
            entity = datastore.Entity(kind,
                                      _app=self._app_id,
                                      name=key_obj.name(),
                                      id=key_obj.id())
            for cell in key_cell_dict[key]:
                if cell.column_family == 'props':
                    entity[cell.column_qualifier] = pickle.loads(cell.value)
            results.append(entity)

        query.set_app(self._app_id)
        datastore_types.SetNamespace(query, namespace)
        encoded = datastore_types.EncodeAppIdNamespace(self._app_id, namespace)

        operators = {
            datastore_pb.Query_Filter.LESS_THAN: '<',
            datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: '<=',
            datastore_pb.Query_Filter.GREATER_THAN: '>',
            datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: '>=',
            datastore_pb.Query_Filter.EQUAL: '==',
        }

        def has_prop_indexed(entity, prop):
            """Returns True if prop is in the entity and is indexed."""
            if prop in datastore_types._SPECIAL_PROPERTIES:
                return True
            elif prop in entity.unindexed_properties():
                return False

            values = entity.get(prop, [])
            if not isinstance(values, (tuple, list)):
                values = [values]

            for value in values:
                if type(value) not in datastore_types._RAW_PROPERTY_TYPES:
                    return True
            return False

        for filt in filters:
            assert filt.op() != datastore_pb.Query_Filter.IN

            prop = filt.property(0).name().decode('utf-8')
            op = operators[filt.op()]

            filter_val_list = [
                datastore_types.FromPropertyPb(filter_prop)
                for filter_prop in filt.property_list()
            ]

            def passes_filter(entity):
                """Returns True if the entity passes the filter, False otherwise.
		
				The filter being evaluated is filt, the current filter that we're on
				in the list of filters in the query.
				"""
                log.debug('filter check for entity: %r' % entity)
                if not has_prop_indexed(entity, prop):
                    return False

                try:
                    entity_vals = datastore._GetPropertyValue(entity, prop)
                except KeyError:
                    entity_vals = []

                if not isinstance(entity_vals, list):
                    entity_vals = [entity_vals]

                for fixed_entity_val in entity_vals:
                    for filter_val in filter_val_list:
                        fixed_entity_type = self._PROPERTY_TYPE_TAGS.get(
                            fixed_entity_val.__class__)
                        filter_type = self._PROPERTY_TYPE_TAGS.get(
                            filter_val.__class__)
                        if fixed_entity_type == filter_type:
                            comp = u'%r %s %r' % (fixed_entity_val, op,
                                                  filter_val)
                        elif op != '==':
                            comp = '%r %s %r' % (fixed_entity_type, op,
                                                 filter_type)
                        else:
                            continue

                        logging.log(logging.DEBUG - 1,
                                    'Evaling filter expression "%s"', comp)

                        try:
                            ret = eval(comp)
                            if ret and ret != NotImplementedError:
                                return True
                        except TypeError:
                            pass

                return False

            results = filter(passes_filter, results)
        log.debug('entity list after filter operation: %r' % results)

        for order in orders:
            prop = order.property().decode('utf-8')
            results = [
                entity for entity in results if has_prop_indexed(entity, prop)
            ]

        def order_compare_entities(a, b):
            """ Return a negative, zero or positive number depending on whether
			entity a is considered smaller than, equal to, or larger than b,
			according to the query's orderings. """
            cmped = 0
            for o in orders:
                prop = o.property().decode('utf-8')

                reverse = (o.direction() is
                           datastore_pb.Query_Order.DESCENDING)

                a_val = datastore._GetPropertyValue(a, prop)
                if isinstance(a_val, list):
                    a_val = sorted(a_val,
                                   order_compare_properties,
                                   reverse=reverse)[0]

                b_val = datastore._GetPropertyValue(b, prop)
                if isinstance(b_val, list):
                    b_val = sorted(b_val,
                                   order_compare_properties,
                                   reverse=reverse)[0]

                cmped = order_compare_properties(a_val, b_val)

                if o.direction() is datastore_pb.Query_Order.DESCENDING:
                    cmped = -cmped

                if cmped != 0:
                    return cmped

            if cmped == 0:
                return cmp(a.key(), b.key())

        def order_compare_properties(x, y):
            """Return a negative, zero or positive number depending on whether
			property value x is considered smaller than, equal to, or larger than
			property value y. If x and y are different types, they're compared based
			on the type ordering used in the real datastore, which is based on the
			tag numbers in the PropertyValue PB.
			"""
            if isinstance(x, datetime.datetime):
                x = datastore_types.DatetimeToTimestamp(x)
            if isinstance(y, datetime.datetime):
                y = datastore_types.DatetimeToTimestamp(y)

            x_type = self._PROPERTY_TYPE_TAGS.get(x.__class__)
            y_type = self._PROPERTY_TYPE_TAGS.get(y.__class__)

            if x_type == y_type:
                try:
                    return cmp(x, y)
                except TypeError:
                    return 0
            else:
                return cmp(x_type, y_type)

        results.sort(order_compare_entities)

        cursor = _Cursor(query, results, order_compare_entities)
        self.__queries[cursor.cursor] = cursor

        if query.has_count():
            count = query.count()
        elif query.has_limit():
            count = query.limit()
        else:
            count = _BATCH_SIZE

        cursor.PopulateQueryResult(query_result,
                                   count,
                                   query.offset(),
                                   compile=query.compile())

        if query.compile():
            compiled_query = query_result.mutable_compiled_query()
            compiled_query.set_keys_only(query.keys_only())
            compiled_query.mutable_primaryscan().set_index_name(query.Encode())