def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, read_consistency_level=None, buffer_size=None): """ Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause` is used instead of a key range. `index_clause` limits the keys that are returned based on expressions that compare the value of a column to a given value. At least one of the expressions in the :class:`.IndexClause` must be on an indexed column. Note that Cassandra does not support secondary indexes or get_indexed_slices() for super column families. .. seealso:: :meth:`~pycassa.index.create_index_clause()` and :meth:`~pycassa.index.create_index_expression()` """ assert not self.super, "get_indexed_slices() is not " \ "supported by super column families" cl = read_consistency_level or self.read_consistency_level cp = self._column_parent() sp = self._slice_predicate(columns, column_start, column_finish, column_reversed, column_count) new_exprs = [] # Pack the values in the index clause expressions for expr in index_clause.expressions: name = self._pack_name(expr.column_name) value = self._pack_value(expr.value, name) new_exprs.append(IndexExpression(name, expr.op, value)) packed_start_key = self._pack_key(index_clause.start_key) clause = IndexClause(new_exprs, packed_start_key, index_clause.count) # Figure out how we will chunk the request if buffer_size is None: buffer_size = self.buffer_size row_count = clause.count count = 0 i = 0 last_key = clause.start_key while True: if row_count is not None: buffer_size = min(row_count - count + 1, buffer_size) clause.count = buffer_size clause.start_key = last_key key_slices = self.pool.execute('get_indexed_slices', cp, clause, sp, cl) if key_slices is None: return for j, key_slice in enumerate(key_slices): # Ignore the first element after the first iteration # because it will be a duplicate. if j == 0 and i != 0: continue unpacked_key = self._unpack_key(key_slice.key) yield (unpacked_key, self._cosc_to_dict(key_slice.columns, include_timestamp)) count += 1 if row_count is not None and count >= row_count: return if len(key_slices) != buffer_size: return last_key = key_slices[-1].key i += 1
def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, super_column=None, read_consistency_level=None, buffer_size=None): """ Fetches a set of rows from this column family based on an index clause. :param index_clause: Limits the keys that are returned based on expressions that compare the value of a column to a given value. At least one of the expressions in the :class:`.IndexClause` must be on an indexed column. .. seealso:: :meth:`~pycassa.index.create_index_clause()` and :meth:`~pycassa.index.create_index_expression()` :type index_clause: :class:`~pycassa.cassandra.ttypes.IndexClause` :param columns: Limit the columns or super columns fetched to the specified list :type columns: list :param column_start: Only fetch columns or super columns ``>= column_start`` :param column_finish: Only fetch columns or super columns ``<= column_finish`` :param column_reversed: Fetch the columns or super_columns in reverse order. If `column_count` is used with this, columns will be drawn from the end. The returned dictionary of columns may not be in reversed order if an ordered ``dict_class`` is not passed to the constructor. :type column_reversed: bool :param column_count: Limit the number of columns or super columns fetched per row :param include_timestamp: If True, return a ``(value, timestamp)`` tuple for each column :type include_timestamp: bool :param super_column: Return columns only in this super column :param read_consistency_level: Affects the guaranteed replication factor before returning from any read operation :type read_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel` :param buffer_size: When calling `get_indexed_slices()`, the intermediate results need to be buffered if we are fetching many rows, otherwise the Cassandra server will overallocate memory and fail. This is the size of that buffer in number of rows. If left as ``None``, the :class:`~pycassa.cassandra.ColumnFamily`'s default `buffer_size` will be used. :type buffer_size: int :rtype: Generator that iterates over: ``{key : {column_name : column_value}}`` """ cp = self._create_column_parent(super_column) sp = self._create_slice_predicate(columns, column_start, column_finish, column_reversed, column_count) new_exprs = [] # Pack the values in the index clause expressions for expr in index_clause.expressions: new_exprs.append(IndexExpression(self._pack_name(expr.column_name), expr.op, self._pack_value(expr.value, expr.column_name))) clause = IndexClause(new_exprs, index_clause.start_key, index_clause.count) # Figure out how we will chunk the request if buffer_size is None: buffer_size = self.buffer_size row_count = clause.count count = 0 i = 0 last_key = clause.start_key while True: if row_count is not None: buffer_size = min(row_count - count + 1, buffer_size) clause.count = buffer_size clause.start_key = last_key key_slices = self.client.get_indexed_slices(cp, clause, sp, self._rcl(read_consistency_level)) if key_slices is None: return for j, key_slice in enumerate(key_slices): # Ignore the first element after the first iteration # because it will be a duplicate. if j == 0 and i != 0: continue yield (key_slice.key, self._convert_ColumnOrSuperColumns_to_dict_class( key_slice.columns, include_timestamp)) count += 1 if row_count is not None and count >= row_count: return if len(key_slices) != buffer_size: return last_key = key_slices[-1].key i += 1
def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, read_consistency_level=None, buffer_size=None, include_ttl=False): """ Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause` is used instead of a key range. `index_clause` limits the keys that are returned based on expressions that compare the value of a column to a given value. At least one of the expressions in the :class:`.IndexClause` must be on an indexed column. Note that Cassandra does not support secondary indexes or get_indexed_slices() for super column families. .. seealso:: :meth:`~pycassa.index.create_index_clause()` and :meth:`~pycassa.index.create_index_expression()` """ assert not self.super, "get_indexed_slices() is not " \ "supported by super column families" cl = read_consistency_level or self.read_consistency_level cp = self._column_parent() sp = self._slice_predicate(columns, column_start, column_finish, column_reversed, column_count) new_exprs = [] # Pack the values in the index clause expressions for expr in index_clause.expressions: value = self._pack_value(expr.value, expr.column_name) name = self._pack_name(expr.column_name) new_exprs.append(IndexExpression(name, expr.op, value)) packed_start_key = self._pack_key(index_clause.start_key) clause = IndexClause(new_exprs, packed_start_key, index_clause.count) # Figure out how we will chunk the request if buffer_size is None: buffer_size = self.buffer_size row_count = clause.count count = 0 i = 0 last_key = clause.start_key while True: if row_count is not None: if i == 0 and row_count <= buffer_size: # We don't need to chunk, grab exactly the number of rows buffer_size = row_count else: buffer_size = min(row_count - count + 1, buffer_size) clause.count = buffer_size clause.start_key = last_key key_slices = self.pool.execute('get_indexed_slices', cp, clause, sp, cl) if key_slices is None: return for j, key_slice in enumerate(key_slices): # Ignore the first element after the first iteration # because it will be a duplicate. if j == 0 and i != 0: continue unpacked_key = self._unpack_key(key_slice.key) yield (unpacked_key, self._cosc_to_dict(key_slice.columns, include_timestamp, include_ttl)) count += 1 if row_count is not None and count >= row_count: return if len(key_slices) != buffer_size: return last_key = key_slices[-1].key i += 1
def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, read_consistency_level=None, buffer_size=None): """ Similar to :meth:`get_range()`, but an :class:`~pycassa.cassandra.ttypes.IndexClause` is used instead of a key range. `index_clause` limits the keys that are returned based on expressions that compare the value of a column to a given value. At least one of the expressions in the :class:`.IndexClause` must be on an indexed column. Note that Cassandra does not support secondary indexes or get_indexed_slices() for super column families. .. seealso:: :meth:`~pycassa.index.create_index_clause()` and :meth:`~pycassa.index.create_index_expression()` """ assert not self.super, "get_indexed_slices() is not " \ "supported by super column families" cp = self._create_column_parent() sp = self._create_slice_predicate(columns, column_start, column_finish, column_reversed, column_count) new_exprs = [] # Pack the values in the index clause expressions for expr in index_clause.expressions: name = self._pack_name(expr.column_name) value = self._pack_value(expr.value, name) new_exprs.append(IndexExpression(name, expr.op, value)) clause = IndexClause(new_exprs, index_clause.start_key, index_clause.count) # Figure out how we will chunk the request if buffer_size is None: buffer_size = self.buffer_size row_count = clause.count count = 0 i = 0 last_key = clause.start_key while True: if row_count is not None: buffer_size = min(row_count - count + 1, buffer_size) clause.count = buffer_size clause.start_key = last_key try: self._obtain_connection() key_slices = self._tlocal.client.get_indexed_slices( cp, clause, sp, self._rcl(read_consistency_level)) finally: self._release_connection() if key_slices is None: return for j, key_slice in enumerate(key_slices): # Ignore the first element after the first iteration # because it will be a duplicate. if j == 0 and i != 0: continue yield (key_slice.key, self._convert_ColumnOrSuperColumns_to_dict_class( key_slice.columns, include_timestamp)) count += 1 if row_count is not None and count >= row_count: return if len(key_slices) != buffer_size: return last_key = key_slices[-1].key i += 1