def __iter__(self, yield_column_names=False): retrieved = 0 column_start = self.column_start while retrieved < self._limit: try: column_count = min(self._chunk_size, self._limit - retrieved) if column_start: column_count += 1 # cassandra includes column_start r = self.cls._cf.multiget(self.rowkeys, column_start=column_start, column_finish=self.column_finish, column_count=column_count, column_reversed=self.column_reversed) # multiget returns OrderedDict {rowkey: {column_name: column_value}} # combine into single OrderedDict of {column_name: column_value} nrows = len(r.keys()) if nrows == 0: return elif nrows == 1: columns = r.values()[0] else: r_combined = {} for d in r.values(): r_combined.update(d) columns = OrderedDict(sorted(r_combined.items(), key=lambda t: self.sort_key(t[0]), reverse=self.column_reversed)) except NotFoundException: return retrieved += self._chunk_size if column_start: try: del columns[column_start] except KeyError: columns.popitem(last=True) # remove extra column if not columns: return # Convert to list of columns l_columns = [{col_name: columns[col_name]} for col_name in columns] column_start = l_columns[-1].keys()[0] objs = self.column_to_obj(l_columns) if yield_column_names: column_names = [column.keys()[0] for column in l_columns] if len(column_names) == 1: ret = (column_names[0], objs), else: ret = zip(column_names, objs) else: ret = objs ret, is_single = tup(ret, ret_is_single=True) for r in ret: yield r
def get(self, key, columns=None, column_start=None, column_finish=None, column_reversed=False, column_count=100, include_timestamp=False, **kwargs): """Get a value from the column family stub.""" my_columns = self.rows.get(key) if include_timestamp: get_value = lambda x: x else: get_value = lambda x: x[0] if not my_columns: raise NotFoundException() items = my_columns.items() items.sort() if column_reversed: items.reverse() sliced_items = [ (k, get_value(v)) for (k, v) in items if self._is_column_in_range( k, columns, column_start, column_finish, column_reversed) ][:column_count] return OrderedDict(sliced_items)
def __init__(self, pool=None, column_family=None, rows=None, **kwargs): rows = rows or OrderedDict() for r in rows.itervalues(): if not isinstance(r, DictWithTime): r = DictWithTime(r) self.rows = rows if pool is not None: pool._register_mock_cf(column_family, self)
def print_map(level, dict): for key in dict.keys(): value = dict[key] if type(value) == type(OrderedDict()): print indent(level), key, ": {" print_map(level + 1, value) print indent(level), "}" elif key == "sig" or key == "psig" or key == "_csh_": # these don't render well even though we do decode # unicode to utf8, so converting to hex print indent(level), key, ":", quote(to_hex_string(value)), "," else: print indent(level), key, ":", quote(value), ","
def get(self, key, columns=None, column_start=None, column_finish=None, column_reversed=False, column_count=100, include_timestamp=False, **kwargs): """Get a value from the column family stub.""" my_columns = self.rows.get(key) if include_timestamp: get_value = lambda x: x else: get_value = lambda x: x[0] if not my_columns: raise NotFoundException() items = my_columns.items() if isinstance(items[0], UUID) and items[0].version == 1: items.sort(key=lambda uuid: uuid.time) elif isinstance(items[0], tuple) and any( isinstance(x, UUID) for x in items[0]): are_components_uuids = [ isinstance(x, UUID) and x.version == 1 for x in items[0] ] def sortuuid(tup): return [ x.time if is_uuid else x for x, is_uuid in zip(tup, are_components_uuids) ] items.sort(key=sortuuid) else: items.sort() if column_reversed: items.reverse() sliced_items = [ (k, get_value(v)) for (k, v) in items if self._is_column_in_range( k, columns, column_start, column_finish, column_reversed) ][:column_count] return OrderedDict(sliced_items)
def test_multiget_batching(self): key_prefix = "TestColumnFamily.test_multiget_batching" keys = [] expected = OrderedDict() for i in range(10): key = key_prefix + str(i) keys.append(key) expected[key] = {'col': 'val'} cf.insert(key, {'col': 'val'}) assert_equal(cf.multiget(keys, buffer_size=1), expected) assert_equal(cf.multiget(keys, buffer_size=2), expected) assert_equal(cf.multiget(keys, buffer_size=3), expected) assert_equal(cf.multiget(keys, buffer_size=9), expected) assert_equal(cf.multiget(keys, buffer_size=10), expected) assert_equal(cf.multiget(keys, buffer_size=11), expected) assert_equal(cf.multiget(keys, buffer_size=100), expected)
def multiget(self, keys, columns=None, column_start=None, column_finish=None, column_reversed=False, column_count=100, include_timestamp=False, **kwargs): """Get multiple key values from the column family stub.""" return OrderedDict((key, self.get( key, columns=columns, column_start=column_start, column_finish=column_finish, column_reversed=column_reversed, column_count=column_count, include_timestamp=include_timestamp, )) for key in keys if key in self.rows)
def serialize_columns(self, columns): return OrderedDict([(key, self.serialize_value(value)) for key, value in columns.items() if value is not None])
def __iter__(self, yield_column_names=False): retrieved = 0 column_start = self.column_start while retrieved < self._limit: try: column_count = min(self._chunk_size, self._limit - retrieved) if column_start: column_count += 1 # cassandra includes column_start r = self.cls._cf.multiget(self.rowkeys, column_start=column_start, column_finish=self.column_finish, column_count=column_count, column_reversed=self.column_reversed) # multiget returns OrderedDict {rowkey: {column_name: column_value}} # combine into single OrderedDict of {column_name: column_value} nrows = len(r.keys()) if nrows == 0: return elif nrows == 1: columns = r.values()[0] else: r_combined = {} for d in r.values(): r_combined.update(d) columns = OrderedDict(sorted(r_combined.items(), key=lambda t: self.sort_key(t[0]), reverse=self.column_reversed)) except NotFoundException: return retrieved += self._chunk_size if column_start: try: del columns[column_start] except KeyError: # This can happen when a timezone-aware datetime is # passed in as a column_start, but non-timezone-aware # datetimes are returned from cassandra, causing `del` to # fail. # # Reversed queries include column_start in the results, # while non-reversed queries do not. if self.column_reversed: columns.popitem(last=False) if not columns: return # Convert to list of columns l_columns = [{col_name: columns[col_name]} for col_name in columns] column_start = l_columns[-1].keys()[0] objs = self.column_to_obj(l_columns) if yield_column_names: column_names = [column.keys()[0] for column in l_columns] if len(column_names) == 1: ret = (column_names[0], objs), else: ret = zip(column_names, objs) else: ret = objs ret, is_single = tup(ret, ret_is_single=True) for r in ret: yield r