class VoteDetailsByDay(tdb_cassandra.View): _use_db = False _fetch_all_columns = True _write_consistency_level = tdb_cassandra.CL.ONE _compare_with = CompositeType(AsciiType(), AsciiType()) _extra_schema_creation_args = { "key_validation_class": ASCII_TYPE, "default_validation_class": UTF8_TYPE, } @classmethod def _rowkey(cls, date): return date.strftime("%Y-%m-%d") @classmethod def create(cls, thing1, thing2s, pgvote, vote_info): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 rowkey = cls._rowkey(pgvote._date.astimezone(VOTE_TIMEZONE).date()) colname = (voter._id36, votee._id36) details = { "direction": pgvote._name, "date": epoch_seconds(pgvote._date), } cls._set_values(rowkey, {colname: json.dumps(details)}) @classmethod def count_votes(cls, date): return sum(1 for x in cls._cf.xget(cls._rowkey(date)))
class VoteDetailsByDay(tdb_cassandra.View): _use_db = False _fetch_all_columns = True _write_consistency_level = tdb_cassandra.CL.ONE _compare_with = CompositeType(AsciiType(), AsciiType()) _extra_schema_creation_args = { "key_validation_class": ASCII_TYPE, "default_validation_class": UTF8_TYPE, } TIMEZONE = pytz.timezone("America/Los_Angeles") @classmethod def _rowkey(cls, vote): return cls._rowkey_by_datetime(vote.date) @classmethod def _rowkey_by_datetime(cls, date): return date.astimezone(cls.TIMEZONE).strftime("%Y-%m-%d") @classmethod def create(cls, user, thing, vote): # we don't use the user or thing args, but they need to be there for # calling this automatically when updating views of a DenormalizedRel colname = (vote.user._id36, vote.thing._id36) data = json.dumps({ "direction": Vote.serialize_direction(vote.direction), "date": int(epoch_timestamp(vote.date)), }) cls._set_values(cls._rowkey(vote), {colname: data}) @classmethod def count_votes(cls, date): return sum(1 for x in cls._cf.xget(cls._rowkey(date)))
class Canvas(tdb_cassandra.View): _use_db = True _connection_pool = 'main' _compare_with = CompositeType(IntegerType(), IntegerType()) """ Super naive storage for the canvas, everything's in a single row. In the future we may want to break it up so that each C* row contains only a subset of all rows. That would spread the data out in the ring and would make it easy to grab regions of the canvas. """ @classmethod def _rowkey(cls): return CANVAS_ID @classmethod def insert_pixel(cls, pixel): columns = { (pixel.x, pixel.y): json.dumps({ "color": pixel.color, "timestamp": convert_uuid_to_time(pixel._id), "user_name": pixel.user_name, "user_fullname": pixel.user_fullname, }) } cls._cf.insert(cls._rowkey(), columns) @classmethod def get(cls, x, y): column = (x, y) try: row = cls._cf.get(cls._rowkey(), columns=[column]) except tdb_cassandra.NotFoundException: return {} d = row.get(column, '{}') pixel_dict = json.loads(d) return pixel_dict @classmethod def get_all(cls): """Return dict of (x,y) -> color""" try: gen = cls._cf.xget(cls._rowkey()) except tdb_cassandra.NotFoundException: return {} return { (x, y): json.loads(d) for (x, y), d in gen }
class VoteDetailsByDay(tdb_cassandra.View): _use_db = False _fetch_all_columns = True _write_consistency_level = tdb_cassandra.CL.ONE _compare_with = CompositeType(AsciiType(), AsciiType()) _extra_schema_creation_args = { "key_validation_class": ASCII_TYPE, "default_validation_class": UTF8_TYPE, } TIMEZONE = pytz.timezone("America/Los_Angeles") @classmethod def _rowkey(cls, vote): return cls._rowkey_by_datetime(vote.date) @classmethod def _rowkey_by_datetime(cls, date): return date.astimezone(cls.TIMEZONE).strftime("%Y-%m-%d") @classmethod def create(cls, user, thing, vote): # we don't use the user or thing args, but they need to be there for # calling this automatically when updating views of a DenormalizedRel colname = (vote.user._id36, vote.thing._id36) data = json.dumps({ "direction": Vote.serialize_direction(vote.direction), "date": int(epoch_timestamp(vote.date)), }) cls._set_values(cls._rowkey(vote), {colname: data}) @classmethod def count_votes(cls, date): """Return the number of votes made on a particular date.""" # convert the date to a datetime in the correct timezone date = datetime(date.year, date.month, date.day, tzinfo=cls.TIMEZONE) # manually count up the number of columns instead of using get_count() # because the large number of columns tends to result in RPC timeouts return sum(1 for x in cls._cf.xget(cls._rowkey_by_datetime(date)))
class PromotedLinkRoadblock(tdb_cassandra.View): _use_db = True _connection_pool = 'main' _read_consistency_level = tdb_cassandra.CL.ONE _write_consistency_level = tdb_cassandra.CL.QUORUM _compare_with = CompositeType( tdb_cassandra.DateType(), tdb_cassandra.DateType(), ) @classmethod def _column(cls, start, end): start, end = map(to_datetime, [start, end]) return {(start, end): ''} @classmethod def _dates_from_key(cls, key): start, end = map(to_date, key) return start, end @classmethod def add(cls, sr, start, end): rowkey = sr._id36 column = cls._column(start, end) now = datetime.now(g.tz).date() ndays = (to_date(end) - now).days + 7 ttl = timedelta(days=ndays).total_seconds() cls._set_values(rowkey, column, ttl=ttl) @classmethod def remove(cls, sr, start, end): rowkey = sr._id36 column = cls._column(start, end) cls._remove(rowkey, column) @classmethod def is_roadblocked(cls, sr, start, end): rowkey = sr._id36 start, end = map(to_date, [start, end]) # retrieve columns for roadblocks starting before end try: columns = cls._cf.get(rowkey, column_finish=(to_datetime(end),), column_count=tdb_cassandra.max_column_count) except tdb_cassandra.NotFoundException: return False for key in columns.iterkeys(): rb_start, rb_end = cls._dates_from_key(key) # check for overlap, end dates not inclusive if (start < rb_end) and (rb_start < end): return (rb_start, rb_end) return False @classmethod def get_roadblocks(cls): ret = [] q = cls._cf.get_range() rows = list(q) srs = Subreddit._byID36([id36 for id36, columns in rows], data=True) for id36, columns in rows: sr = srs[id36] for key in columns.iterkeys(): start, end = cls._dates_from_key(key) ret.append((sr.name, start, end)) return ret
def create_cfs(self): """ Creates the Cassandra Column Families (if not exist) """ sys_mgr = None pool = None try: sys_mgr = SystemManager() pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS) try: cf = ColumnFamily(pool, CF_LOGS) except: logger.info("create_cfs(): Creating column family %s", CF_LOGS) #======================================== # Column key -> CompositeType #======================================== # 1. UUID + Timestamp # 2. Host / Origin # 3. Application # 4. Severiry comparator = CompositeType(TimeUUIDType(), UTF8Type(), UTF8Type(), UTF8Type()) sys_mgr.create_column_family(settings.KEYSPACE, CF_LOGS, comparator_type=comparator) cf = ColumnFamily(pool, CF_LOGS) # cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_METADATA) except: logger.info("create_cfs(): Creating column family %s", CF_METADATA) sys_mgr.create_column_family(settings.KEYSPACE, CF_METADATA, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_METADATA) cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) except: logger.info("create_cfs(): Creating column family %s", CF_TIMESTAMP_BITMAP) sys_mgr.create_column_family(settings.KEYSPACE, CF_TIMESTAMP_BITMAP, comparator_type=IntegerType()) cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) try: cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) except: logger.info("create_cfs(): Creating column family %s", CF_MULTI_MESSAGELOGS) sys_mgr.create_column_family(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:host', UTF8_TYPE, index_name='multimsg_host_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:application', UTF8_TYPE, index_name='multimsg_application_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:status', UTF8_TYPE, index_name='multimsg_finish_status_index') finally: if pool: pool.dispose() if sys_mgr: sys_mgr.close()
def test(host='localhost', keyspace_provided=None): from pycassa.types import BytesType, CompositeType from pycassa.system_manager import SystemManager, SIMPLE_STRATEGY # Create a fake keyspace, if not provided if not keyspace_provided: import random keyspace = '%s_%s' % ('json', random.randrange(1000000, 100000000)) else: keyspace = keyspace_provided # Connect to cluster and create keyspace system_manager = SystemManager(host) system_manager.create_keyspace(keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'}) try: # Create CF with many CompositeFields comparator = CompositeType(BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType(), BytesType()) system_manager.create_column_family(keyspace, 'json', comparator_type=comparator) # Connect to the KS/CF and save samples db = CassandraJsonMapper.db(keyspace, 'json') tests = 0 # ---------------------------------------------------------------------- # Test a complicated structure sample_1 = { 'key1': { 'a': 1, 2: 'b', 'c': { 'd': 3, 'e': { 'f': True }, 'g': [ ['h', 'i', 'j', 4, 5], ['k', 'l', 'm', 'n', 'o'], ], 'p': [{ 'id': 6, 'q': 'r' }, { 'id': 7, 's': 't' }], 'u': [], 'v': None } } } db.save(sample_1) if db.get('key1') != sample_1['key1']: raise AssertionError( 'What was saved is not being equally returned.') tests += 1 # ---------------------------------------------------------------------- # Test improper format missing values sample_2 = {'key1': 1} try: db.save(sample_2) raise AssertionError('sample_2 should have thrown a KeyError.') except KeyError: pass tests += 1 # ---------------------------------------------------------------------- # Test saving multiple keys sample_3 = {'key2': {2: 2}, 'key3': {3: 3}, 'key4': {4: 4}} db.save(sample_3) if db.get('key2') != {2:2} or db.get('key3') != {3:3} or \ db.get('key4') != {4:4}: raise AssertionError('Not all keys in a json_payload were saved.') tests += 1 # ---------------------------------------------------------------------- # Test delete db.delete('key1') if db.get('key1'): raise AssertionError('Not all keys in were delted.') tests += 1 # ---------------------------------------------------------------------- # Test deletes db.delete(['key2', 'key3', 'key4']) if db.get('key2') or db.get('key3') or db.get('key4'): raise AssertionError('Not all keys in were delted.') tests += 1 # ---------------------------------------------------------------------- # Test reading from fake keys if db.get('fake_key') != {}: raise AssertionError('A get(fake_key) should return {}.') tests += 1 # ---------------------------------------------------------------------- # Test multi-threaded save db.mt_save(sample_1) db.mt_finish() if db.get('key1') != sample_1['key1']: raise AssertionError( 'What was saved is not being equally returned.') tests += 1 # ---------------------------------------------------------------------- # print json_format(db.get('key1')) print '{0}/{0} tests passed!'.format(tests) except: raise finally: # For debugging purposes # raw_input('Press ENTER to continue...') if not keyspace_provided: # Delete the temporary KS's system_manager.drop_keyspace(keyspace) system_manager.close()