Python IndexClause Examples, pycassa.cassandra.ttypes.IndexClause Python Examples

Example #1

0

Show file

File: columnfamilymap.py Project: amorton/cassandra-unicode-bug

    def get_indexed_slices(self, instance=None, *args, **kwargs):
        """
        Fetches a list of instances that satisfy an index clause. Similar
        to :meth:`get_range()`, but uses an index clause instead of a key range.

        If `instance` is supplied, its values will be used for each
        :class:`IndexExpression` where the name matches one of the instance's
        attribute names. This makes packing the values in the :class:`IndexExpresssion`
        simpler when possible.

        .. deprecated:: 1.0.7
            The instance parameter is deprecated. IndexExpression values should only
            be passed through `index_clause`.

        See :meth:`.ColumnFamily.get_indexed_slices()` for
        an explanation of the parameters.

        """

        assert not self.column_family.super, "get_indexed_slices() is not " \
                "supported by super column families"

        if 'columns' not in kwargs and not self.raw_columns:
            kwargs['columns'] = self.columns.keys()

        if instance is not None:
            warnings.warn(
                "ColumnFamilyMap.get_indexed_slice()'s 'instance' parameter is deprecated.",
                DeprecationWarning)

        # Autopack the index clause's values
        new_exprs = []
        for expr in kwargs['index_clause'].expressions:
            if instance is not None:
                val = instance.__dict__[expr.column_name]
            else:
                val = expr.value
            packed_val = self.columns[expr.column_name].pack(val)
            new_expr = IndexExpression(expr.column_name,
                                       expr.op,
                                       value=packed_val)
            new_exprs.append(new_expr)
        old_clause = kwargs['index_clause']
        new_clause = IndexClause(new_exprs, old_clause.start_key,
                                 old_clause.count)
        kwargs['index_clause'] = new_clause

        keyslice_map = self.column_family.get_indexed_slices(*args, **kwargs)

        ret = self.dict_class()
        for key, columns in keyslice_map:
            combined = self.combine_columns(columns)
            ret[key] = create_instance(self.cls, key=key, **combined)
        return ret

Example #2

0

Show file

def create_index_clause(expr_list, start_key='', count=100):
    """
    Constructs an :class:`~pycassa.cassandra.ttypes.IndexClause` for use with 
    :meth:`~pycassa.columnfamily.get_indexed_slices()`

    :param expr_list: [:class:`~pycassa.cassandra.ttypes.IndexExpression`]
        A list of `IndexExpressions` to match
    :param start_key: str
        The key to begin searching from
    :param count: int
        The number of results to return

    """
    return IndexClause(expressions=expr_list, start_key=start_key, count=count)

Example #3

0

Show file

def create_index_clause(expr_list, start_key='', count=100):
    """
    Constructs an :class:`~pycassa.cassandra.ttypes.IndexClause` for use with 
    :meth:`~pycassa.columnfamily.get_indexed_slices()`

    `expr_list` should be a list of
    :class:`~pycassa.cassandra.ttypes.IndexExpression` objects that
    must be matched for a row to be returned.  At least one of these expressions
    must be on an indexed column.

    Cassandra will only return matching rows with keys after `start_key`.  If this
    is the empty string, all rows will be considered.  Keep in mind that this
    is not as meaningful unless an OrderPreservingPartitioner is used.

    The number of rows to return is limited by `count`, which defaults to 100.

    """
    return IndexClause(expressions=expr_list, start_key=start_key, count=count)

Example #4

0

Show file

File: columnfamilymap.py Project: esatterly/splunk-cassandra

    def get_indexed_slices(self, *args, **kwargs):
        """
        Fetches a list of instances that satisfy an index clause. Similar
        to :meth:`get_range()`, but uses an index clause instead of a key range.

        See :meth:`.ColumnFamily.get_indexed_slices()` for
        an explanation of the parameters.

        """

        assert not self.column_family.super, "get_indexed_slices() is not " \
                "supported by super column families"

        if 'columns' not in kwargs and not self.raw_columns:
            kwargs['columns'] = self.columns.keys()

        # Autopack the index clause's values
        new_exprs = []
        for expr in kwargs['index_clause'].expressions:
            packed_val = self.columns[expr.column_name].pack(expr.value)
            new_expr = IndexExpression(expr.column_name,
                                       expr.op,
                                       value=packed_val)
            new_exprs.append(new_expr)
        old_clause = kwargs['index_clause']
        new_clause = IndexClause(new_exprs, old_clause.start_key,
                                 old_clause.count)
        kwargs['index_clause'] = new_clause

        keyslice_map = self.column_family.get_indexed_slices(*args, **kwargs)

        ret = self.dict_class()
        for key, columns in keyslice_map:
            combined = self.combine_columns(columns)
            ret[key] = create_instance(self.cls, key=key, **combined)
        return ret

Example #5

0

Show file

File: columnfamily.py Project: samuraisam/pycassa

    def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="",
                           column_reversed=False, column_count=100, include_timestamp=False,
                           read_consistency_level=None, buffer_size=None):
        """
        Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause`
        is used instead of a key range.

        `index_clause` limits the keys that are returned based on expressions
        that compare the value of a column to a given value.  At least one of the
        expressions in the :class:`.IndexClause` must be on an indexed column.

        Note that Cassandra does not support secondary indexes or get_indexed_slices()
        for super column families.

            .. seealso:: :meth:`~pycassa.index.create_index_clause()` and
                         :meth:`~pycassa.index.create_index_expression()`

        """

        assert not self.super, "get_indexed_slices() is not " \
                "supported by super column families"

        cl = read_consistency_level or self.read_consistency_level
        cp = self._column_parent()
        sp = self._slice_predicate(columns, column_start, column_finish,
                                   column_reversed, column_count)

        new_exprs = []
        # Pack the values in the index clause expressions
        for expr in index_clause.expressions:
            name = self._pack_name(expr.column_name)
            value = self._pack_value(expr.value, name)
            new_exprs.append(IndexExpression(name, expr.op, value))

        packed_start_key = self._pack_key(index_clause.start_key)
        clause = IndexClause(new_exprs, packed_start_key, index_clause.count)

        # Figure out how we will chunk the request
        if buffer_size is None:
            buffer_size = self.buffer_size
        row_count = clause.count

        count = 0
        i = 0
        last_key = clause.start_key
        while True:
            if row_count is not None:
                buffer_size = min(row_count - count + 1, buffer_size)
            clause.count = buffer_size
            clause.start_key = last_key
            key_slices = self.pool.execute('get_indexed_slices', cp, clause, sp, cl)

            if key_slices is None:
                return
            for j, key_slice in enumerate(key_slices):
                # Ignore the first element after the first iteration
                # because it will be a duplicate.
                if j == 0 and i != 0:
                    continue
                unpacked_key = self._unpack_key(key_slice.key)
                yield (unpacked_key,
                       self._cosc_to_dict(key_slice.columns, include_timestamp))

                count += 1
                if row_count is not None and count >= row_count:
                    return

            if len(key_slices) != buffer_size:
                return
            last_key = key_slices[-1].key
            i += 1

Example #6

0

Show file

File: columnfamily.py Project: volkangurel/pycassa

    def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="",
                           column_reversed=False, column_count=100, include_timestamp=False,
                           read_consistency_level=None, buffer_size=None, include_ttl=False):
        """
        Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause`
        is used instead of a key range.

        `index_clause` limits the keys that are returned based on expressions
        that compare the value of a column to a given value.  At least one of the
        expressions in the :class:`.IndexClause` must be on an indexed column.

        Note that Cassandra does not support secondary indexes or get_indexed_slices()
        for super column families.

            .. seealso:: :meth:`~pycassa.index.create_index_clause()` and
                         :meth:`~pycassa.index.create_index_expression()`

        """

        assert not self.super, "get_indexed_slices() is not " \
                "supported by super column families"

        cl = read_consistency_level or self.read_consistency_level
        cp = self._column_parent()
        sp = self._slice_predicate(columns, column_start, column_finish,
                                   column_reversed, column_count)

        new_exprs = []
        # Pack the values in the index clause expressions
        for expr in index_clause.expressions:
            value = self._pack_value(expr.value, expr.column_name)
            name = self._pack_name(expr.column_name)
            new_exprs.append(IndexExpression(name, expr.op, value))

        packed_start_key = self._pack_key(index_clause.start_key)
        clause = IndexClause(new_exprs, packed_start_key, index_clause.count)

        # Figure out how we will chunk the request
        if buffer_size is None:
            buffer_size = self.buffer_size
        row_count = clause.count

        count = 0
        i = 0
        last_key = clause.start_key
        while True:
            if row_count is not None:
                if i == 0 and row_count <= buffer_size:
                    # We don't need to chunk, grab exactly the number of rows
                    buffer_size = row_count
                else:
                    buffer_size = min(row_count - count + 1, buffer_size)
            clause.count = buffer_size
            clause.start_key = last_key
            key_slices = self.pool.execute('get_indexed_slices', cp, clause, sp, cl)

            if key_slices is None:
                return
            for j, key_slice in enumerate(key_slices):
                # Ignore the first element after the first iteration
                # because it will be a duplicate.
                if j == 0 and i != 0:
                    continue
                unpacked_key = self._unpack_key(key_slice.key)
                yield (unpacked_key,
                       self._cosc_to_dict(key_slice.columns, include_timestamp, include_ttl))

                count += 1
                if row_count is not None and count >= row_count:
                    return

            if len(key_slices) != buffer_size:
                return
            last_key = key_slices[-1].key
            i += 1

Example #7

0

Show file

File: columnfamily.py Project: trhowe/pycassa

    def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="",
                           column_reversed=False, column_count=100, include_timestamp=False,
                           super_column=None, read_consistency_level=None,
                           buffer_size=None):
        """
        Fetches a set of rows from this column family based on an index clause.

        :param index_clause: Limits the keys that are returned based on expressions
          that compare the value of a column to a given value.  At least one of the
          expressions in the :class:`.IndexClause` must be on an indexed column.

            .. seealso:: :meth:`~pycassa.index.create_index_clause()` and
                         :meth:`~pycassa.index.create_index_expression()`

        :type index_clause: :class:`~pycassa.cassandra.ttypes.IndexClause`

        :param columns: Limit the columns or super columns fetched to the specified list
        :type columns: list

        :param column_start: Only fetch columns or super columns ``>= column_start``

        :param column_finish: Only fetch columns or super columns ``<= column_finish``

        :param column_reversed:
          Fetch the columns or super_columns in reverse order. If `column_count` is
          used with this, columns will be drawn from the end. The returned dictionary
          of columns may not be in reversed order if an ordered ``dict_class`` is not
          passed to the constructor.
        :type column_reversed: bool

        :param column_count: Limit the number of columns or super columns fetched per row

        :param include_timestamp: If True, return a ``(value, timestamp)`` tuple for each column
        :type include_timestamp: bool

        :param super_column: Return columns only in this super column

        :param read_consistency_level: Affects the guaranteed replication factor before
          returning from any read operation
        :type read_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`

        :param buffer_size: When calling `get_indexed_slices()`, the intermediate
          results need to be buffered if we are fetching many rows, otherwise the Cassandra
          server will overallocate memory and fail.  This is the size of
          that buffer in number of rows. If left as ``None``,
          the :class:`~pycassa.cassandra.ColumnFamily`'s default
          `buffer_size` will be used.
        :type buffer_size: int

        :rtype: Generator that iterates over:
                ``{key : {column_name : column_value}}``
        """

        cp = self._create_column_parent(super_column)
        sp = self._create_slice_predicate(columns, column_start, column_finish,
                                    column_reversed, column_count)

        new_exprs = []
        # Pack the values in the index clause expressions
        for expr in index_clause.expressions:
            new_exprs.append(IndexExpression(self._pack_name(expr.column_name),
                                             expr.op,
                                             self._pack_value(expr.value, expr.column_name)))

        clause = IndexClause(new_exprs, index_clause.start_key, index_clause.count)

        # Figure out how we will chunk the request
        if buffer_size is None:
            buffer_size = self.buffer_size
        row_count = clause.count

        count = 0
        i = 0
        last_key = clause.start_key
        while True:
            if row_count is not None:
                buffer_size = min(row_count - count + 1, buffer_size)
            clause.count = buffer_size
            clause.start_key = last_key
            key_slices = self.client.get_indexed_slices(cp, clause, sp,
                                                        self._rcl(read_consistency_level))

            if key_slices is None:
                return
            for j, key_slice in enumerate(key_slices):
                # Ignore the first element after the first iteration
                # because it will be a duplicate.
                if j == 0 and i != 0:
                    continue
                yield (key_slice.key, self._convert_ColumnOrSuperColumns_to_dict_class(
                        key_slice.columns, include_timestamp))

                count += 1
                if row_count is not None and count >= row_count:
                    return

            if len(key_slices) != buffer_size:
                return
            last_key = key_slices[-1].key
            i += 1

Example #8

0

Show file

File: columnfamily.py Project: tedcarroll/pycassa

    def get_indexed_slices(self,
                           index_clause,
                           columns=None,
                           column_start="",
                           column_finish="",
                           column_reversed=False,
                           column_count=100,
                           include_timestamp=False,
                           read_consistency_level=None,
                           buffer_size=None):
        """
        Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause`
        is used instead of a key range.

        `index_clause` limits the keys that are returned based on expressions
        that compare the value of a column to a given value.  At least one of the
        expressions in the :class:`.IndexClause` must be on an indexed column.

        Note that Cassandra does not support secondary indexes or get_indexed_slices()
        for super column families.

            .. seealso:: :meth:`~pycassa.index.create_index_clause()` and
                         :meth:`~pycassa.index.create_index_expression()`

        """

        assert not self.super, "get_indexed_slices() is not " \
                "supported by super column families"

        cp = self._create_column_parent()
        sp = self._create_slice_predicate(columns, column_start, column_finish,
                                          column_reversed, column_count)

        new_exprs = []
        # Pack the values in the index clause expressions
        for expr in index_clause.expressions:
            name = self._pack_name(expr.column_name)
            value = self._pack_value(expr.value, name)
            new_exprs.append(IndexExpression(name, expr.op, value))

        clause = IndexClause(new_exprs, index_clause.start_key,
                             index_clause.count)

        # Figure out how we will chunk the request
        if buffer_size is None:
            buffer_size = self.buffer_size
        row_count = clause.count

        count = 0
        i = 0
        last_key = clause.start_key
        while True:
            if row_count is not None:
                buffer_size = min(row_count - count + 1, buffer_size)
            clause.count = buffer_size
            clause.start_key = last_key
            try:
                self._obtain_connection()
                key_slices = self._tlocal.client.get_indexed_slices(
                    cp, clause, sp, self._rcl(read_consistency_level))
            finally:
                self._release_connection()

            if key_slices is None:
                return
            for j, key_slice in enumerate(key_slices):
                # Ignore the first element after the first iteration
                # because it will be a duplicate.
                if j == 0 and i != 0:
                    continue
                yield (key_slice.key,
                       self._convert_ColumnOrSuperColumns_to_dict_class(
                           key_slice.columns, include_timestamp))

                count += 1
                if row_count is not None and count >= row_count:
                    return

            if len(key_slices) != buffer_size:
                return
            last_key = key_slices[-1].key
            i += 1