def create_batch_tables(cluster, session): """ Create the tables required for large batches. Args: cluster: A cassandra-driver cluster. session: A cassandra-driver session. """ keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] if 'batches' in keyspace_metadata.tables: columns = keyspace_metadata.tables['batches'].columns if ('transaction' in columns and columns['transaction'].cql_type != 'bigint'): session.execute('DROP TABLE batches', timeout=SCHEMA_CHANGE_TIMEOUT) logger.info('Trying to create batches') create_table = """ CREATE TABLE IF NOT EXISTS batches ( app text, transaction bigint, namespace text, path blob, old_value blob, new_value blob, exclude_indices text, PRIMARY KEY ((app, transaction), namespace, path) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating batches table. ' 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise if ('batch_status' in keyspace_metadata.tables and 'txid_hash' not in keyspace_metadata.tables['batch_status'].columns): session.execute('DROP TABLE batch_status', timeout=SCHEMA_CHANGE_TIMEOUT) logger.info('Trying to create batch_status') create_table = """ CREATE TABLE IF NOT EXISTS batch_status ( txid_hash blob PRIMARY KEY, applied boolean, op_id uuid ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating batch_status table. ' 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise
def rebuild_task_indexes(session): """ Creates index entries for all pull queue tasks. Args: session: A cassandra-driver session. """ logger.info('Rebuilding task indexes') batch_size = 100 total_tasks = 0 app = '' queue = '' id_ = '' while True: results = session.execute( """ SELECT app, queue, id, lease_expires, tag FROM pull_queue_tasks WHERE token(app, queue, id) > token(%(app)s, %(queue)s, %(id)s) LIMIT {} """.format(batch_size), { 'app': app, 'queue': queue, 'id': id_ }) results_list = list(results) for result in results_list: parameters = { 'app': result.app, 'queue': result.queue, 'eta': result.lease_expires, 'id': result.id, 'tag': result.tag or '' } insert_eta_index = SimpleStatement(""" INSERT INTO pull_queue_eta_index (app, queue, eta, id, tag) VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s) """, retry_policy=BASIC_RETRIES) session.execute(insert_eta_index, parameters) insert_tag_index = SimpleStatement(""" INSERT INTO pull_queue_tags_index (app, queue, tag, eta, id) VALUES (%(app)s, %(queue)s, %(tag)s, %(eta)s, %(id)s) """, retry_policy=BASIC_RETRIES) session.execute(insert_tag_index, parameters) total_tasks += len(results_list) if len(results_list) < batch_size: break app = results_list[-1].app queue = results_list[-1].queue id_ = results_list[-1].id logger.info('Created entries for {} tasks'.format(total_tasks))
def create_batch_tables(cluster, session): """ Create the tables required for large batches. Args: cluster: A cassandra-driver cluster. session: A cassandra-driver session. """ logging.info('Trying to create batches') create_table = """ CREATE TABLE IF NOT EXISTS batches ( app text, transaction int, namespace text, path blob, old_value blob, new_value blob, exclude_indices text, PRIMARY KEY ((app, transaction), namespace, path) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating batches table. ' 'Waiting 1 minute for schema to settle.') time.sleep(60) raise logging.info('Trying to create batch_status') create_table = """ CREATE TABLE IF NOT EXISTS batch_status ( app text, transaction int, applied boolean, PRIMARY KEY ((app), transaction) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating batch_status table. ' 'Waiting 1 minute for schema to settle.') time.sleep(60) raise
def get_entity(self, table_name, row_key, column_names): error = [ERROR_DEFAULT] list = error row_key = bytearray('/'.join([table_name, row_key])) statement = """ SELECT * FROM "{table}" WHERE {key} = %(key)s AND {column} IN %(columns)s """.format(table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = {'key': row_key, 'columns': ValueSequence(column_names)} try: results = self.session.execute(query, parameters) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: raise AppScaleDBConnectionError('Unable to fetch entity') results_dict = {} for (_, column, value) in results: results_dict[column] = value if not results_dict: list[0] += 'Not found' return list for column in column_names: list.append(results_dict[column]) return list
def create_transactions_table(session): """ Create the table used for storing transaction metadata. Args: session: A cassandra-driver session. """ create_table = """ CREATE TABLE IF NOT EXISTS transactions ( txid_hash blob, operation tinyint, namespace text, path blob, start_time timestamp, is_xg boolean, in_progress blob, entity blob, task blob, PRIMARY KEY (txid_hash, operation, namespace, path) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating transactions table. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise
def get_entity(self, table_name, row_key, column_names): error = [ERROR_DEFAULT] list = error row_key = bytearray('/'.join([table_name, row_key])) statement = """ SELECT * FROM "{table}" WHERE {key} = %(key)s AND {column} IN %(columns)s """.format(table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = {'key': row_key, 'columns': ValueSequence(column_names)} try: results = self.session.execute(query, parameters) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): list[0] += 'Unable to fetch entity' return list results_dict = {} for (_, column, value) in results: results_dict[column] = value if not results_dict: list[0] += 'Not found' return list for column in column_names: list.append(results_dict[column]) return list
def create_groups_table(session): create_table = """ CREATE TABLE IF NOT EXISTS group_updates ( group blob PRIMARY KEY, last_update int ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating group_updates table. ' 'Waiting 1 minute for schema to settle.') time.sleep(60) raise
def delete_row(self, table_name, row_key): response = [ERROR_DEFAULT] row_key = bytearray('/'.join([table_name, row_key])) statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format( table=table_name, key=ThriftColumn.KEY) delete = SimpleStatement(statement, retry_policy=BASIC_RETRIES) try: yield self.tornado_cassandra.execute(delete, (row_key, )) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: response[0] += 'Unable to delete row' raise gen.Return(response) response.append('0') raise gen.Return(response)
def delete_row(self, table_name, row_key): response = [ERROR_DEFAULT] row_key = bytearray('/'.join([table_name, row_key])) statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format( table=table_name, key=ThriftColumn.KEY) delete = SimpleStatement(statement, retry_policy=self.retry_policy) try: self.session.execute(delete, (row_key,)) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: response[0] += 'Unable to delete row' return response response.append('0') return response
def create_groups_table(session): create_table = """ CREATE TABLE IF NOT EXISTS group_updates ( group blob PRIMARY KEY, last_update bigint ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating group_updates table. ' 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise
def delete_row(self, table_name, row_key): response = [ERROR_DEFAULT] row_key = bytearray('/'.join([table_name, row_key])) statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format( table=table_name, key=ThriftColumn.KEY) delete = SimpleStatement(statement, retry_policy=self.retry_policy) try: self.session.execute(delete, (row_key, )) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): response[0] += 'Unable to delete row' return response response.append('0') return response
def get_table(self, table_name, column_names): """ Fetch a list of values for the given columns in a table. Args: table_name: A string containing the name of the table. column_names: A list of column names to retrieve values for. Returns: A list containing a status marker followed by the values. Note: The response does not contain any row keys or column names. """ response = [ERROR_DEFAULT] statement = 'SELECT * FROM "{table}"'.format(table=table_name) query = SimpleStatement(statement, retry_policy=self.retry_policy) try: results = self.session.execute(query) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): response[0] += 'Unable to fetch table contents' return response results_list = [] current_item = {} current_key = None for (key, column, value) in results: if key != current_key: if current_item: results_list.append({current_key: current_item}) current_item = {} current_key = key current_item[column] = value if current_item: results_list.append({current_key: current_item}) for result in results_list: result_columns = result.values()[0] for column in column_names: try: response.append(result_columns[column]) except KeyError: response[0] += 'Table contents did not match schema' return response return response
def get_table(self, table_name, column_names): """ Fetch a list of values for the given columns in a table. Args: table_name: A string containing the name of the table. column_names: A list of column names to retrieve values for. Returns: A list containing a status marker followed by the values. Note: The response does not contain any row keys or column names. """ response = [ERROR_DEFAULT] statement = 'SELECT * FROM "{table}"'.format(table=table_name) query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) try: results = yield self.tornado_cassandra.execute(query) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: response[0] += 'Unable to fetch table contents' raise gen.Return(response) results_list = [] current_item = {} current_key = None for (key, column, value) in results: if key != current_key: if current_item: results_list.append({current_key: current_item}) current_item = {} current_key = key current_item[column] = value if current_item: results_list.append({current_key: current_item}) for result in results_list: result_columns = result.values()[0] for column in column_names: try: response.append(result_columns[column]) except KeyError: response[0] += 'Table contents did not match schema' raise gen.Return(response) raise gen.Return(response)
def create_entity_ids_table(session): create_table = """ CREATE TABLE IF NOT EXISTS reserved_ids ( project text, scattered boolean, last_reserved bigint, op_id uuid, PRIMARY KEY ((project, scattered)) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating entity_ids table. ' 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg( 'Replication must be greater than zero') zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips()) zk_client.start() hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = { 'class': 'SimpleStrategy', 'replication_factor': replication } session.execute(create_keyspace, {'replication': keyspace_replication}, timeout=SCHEMA_CHANGE_TIMEOUT) session.set_keyspace(KEYSPACE) logger.info('Waiting for all hosts to be connected') deadline = time.time() + SCHEMA_CHANGE_TIMEOUT while True: if time.time() > deadline: logger.warning( 'Timeout when waiting for hosts to join. Continuing ' 'with connected hosts.') break if len(session.get_pool_state()) == len(hosts): break time.sleep(1) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logger.info('Trying to create {}'.format(table)) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating {} table. Waiting {} ' 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise migrate_composite_index_metadata(cluster, session, zk_client) create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_entity_ids_table(session) first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format( dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format(table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) if existing_entities: current_version = current_datastore_version(session) if current_version == 1.0: # Instruct the groomer to reclean the indexes. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.DIRTY)) } session.execute(metadata_insert, parameters) parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) else: parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN)) } session.execute(metadata_insert, parameters) # Indicate that scatter property values do not need to be populated. parameters = { 'key': bytearray(cassandra_interface.SCATTER_PROP_KEY), 'column': cassandra_interface.SCATTER_PROP_KEY, 'value': bytearray(ScatterPropStates.POPULATED) } session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = { 'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) logger.info('Cassandra is primed.')
def test1(): query = "SELECT * FROM test_tweet_select" fetch_size = 10000 statement = SimpleStatement(query, fetch_size=fetch_size) for row in session.execute(statement, execution_profile='node1'): pass
def create_pull_queue_tables(cluster, session): """ Create the required tables for pull queues. Args: cluster: A cassandra-driver cluster. session: A cassandra-driver session. """ logger.info('Trying to create pull_queue_tasks') create_table = """ CREATE TABLE IF NOT EXISTS pull_queue_tasks ( app text, queue text, id text, payload text, enqueued timestamp, lease_expires timestamp, retry_count int, tag text, op_id uuid, PRIMARY KEY ((app, queue, id)) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_tasks. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] if 'op_id' not in keyspace_metadata.tables['pull_queue_tasks'].columns: try: session.execute('ALTER TABLE pull_queue_tasks ADD op_id uuid', timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered a timeout when altering pull_queue_tasks. Waiting {} ' 'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise logger.info('Trying to create pull_queue_tasks_index') create_index_table = """ CREATE TABLE IF NOT EXISTS pull_queue_tasks_index ( app text, queue text, eta timestamp, id text, tag text, tag_exists boolean, PRIMARY KEY ((app, queue, eta), id) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_index_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_tasks_index.' ' Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise logger.info('Trying to create pull_queue_tags index') create_index = """ CREATE INDEX IF NOT EXISTS pull_queue_tags ON pull_queue_tasks_index (tag); """ try: session.execute(create_index, timeout=SCHEMA_CHANGE_TIMEOUT) except (OperationTimedOut, InvalidRequest): logger.warning( 'Encountered error while creating pull_queue_tags index. Waiting {} ' 'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise # This additional index is needed for groupByTag=true,tag=None queries # because Cassandra can only do '=' queries on secondary indices. logger.info('Trying to create pull_queue_tag_exists index') create_index = """ CREATE INDEX IF NOT EXISTS pull_queue_tag_exists ON pull_queue_tasks_index (tag_exists); """ try: session.execute(create_index, timeout=SCHEMA_CHANGE_TIMEOUT) except (OperationTimedOut, InvalidRequest): logger.warning( 'Encountered error while creating pull_queue_tag_exists index. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise logger.info('Trying to create pull_queue_leases') create_leases_table = """ CREATE TABLE IF NOT EXISTS pull_queue_leases ( app text, queue text, leased timestamp, PRIMARY KEY ((app, queue, leased)) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_leases_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_leases. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg( 'Replication must be greater than zero') hosts = appscale_info.get_db_ips() cluster = None session = None remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = { 'class': 'SimpleStrategy', 'replication_factor': replication } session.execute(create_keyspace, {'replication': keyspace_replication}) session.set_keyspace(KEYSPACE) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logging.info('Trying to create {}'.format(table)) try: session.execute(statement) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating {} table. ' 'Waiting 1 minute for schema to settle.'.format(table)) time.sleep(60) raise create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_pull_queue_tables(cluster, session) first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format( dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format(table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) if not existing_entities: parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(POST_JOURNAL_VERSION)) } session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN)) } session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = { 'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray('true') } session.execute(metadata_insert, parameters) logging.info('Cassandra is primed.')
def create_pull_queue_tables(cluster, session): """ Create the required tables for pull queues. Args: cluster: A cassandra-driver cluster. session: A cassandra-driver session. """ logger.info('Trying to create pull_queue_tasks') create_table = """ CREATE TABLE IF NOT EXISTS pull_queue_tasks ( app text, queue text, id text, payload text, enqueued timestamp, lease_expires timestamp, retry_count int, tag text, op_id uuid, PRIMARY KEY ((app, queue, id)) ) """ statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_tasks. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] if 'op_id' not in keyspace_metadata.tables['pull_queue_tasks'].columns: try: session.execute('ALTER TABLE pull_queue_tasks ADD op_id uuid', timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered a timeout when altering pull_queue_tasks. Waiting {} ' 'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise rebuild_indexes = False if ('pull_queue_tasks_index' in keyspace_metadata.tables and 'tag_exists' in keyspace_metadata.tables['pull_queue_tasks_index'].columns): rebuild_indexes = True logger.info('Dropping outdated pull_queue_tags index') session.execute('DROP INDEX IF EXISTS pull_queue_tags', timeout=SCHEMA_CHANGE_TIMEOUT) logger.info('Dropping outdated pull_queue_tag_exists index') session.execute('DROP INDEX IF EXISTS pull_queue_tag_exists', timeout=SCHEMA_CHANGE_TIMEOUT) logger.info('Dropping outdated pull_queue_tasks_index table') session.execute('DROP TABLE pull_queue_tasks_index', timeout=SCHEMA_CHANGE_TIMEOUT) logger.info('Trying to create pull_queue_eta_index') create_index_table = """ CREATE TABLE IF NOT EXISTS pull_queue_eta_index ( app text, queue text, eta timestamp, id text, tag text, PRIMARY KEY ((app, queue, eta, id)) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_index_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_eta_index.' ' Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise logger.info('Trying to create pull_queue_tags_index') create_tags_index_table = """ CREATE TABLE IF NOT EXISTS pull_queue_tags_index ( app text, queue text, tag text, eta timestamp, id text, PRIMARY KEY ((app, queue, tag, eta, id)) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_tags_index_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_tags_index.' ' Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise if rebuild_indexes: rebuild_task_indexes(session) logger.info('Trying to create pull_queue_leases') create_leases_table = """ CREATE TABLE IF NOT EXISTS pull_queue_leases ( app text, queue text, leased timestamp, PRIMARY KEY ((app, queue, leased)) ) WITH gc_grace_seconds = 120 """ statement = SimpleStatement(create_leases_table, retry_policy=NO_RETRIES) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating pull_queue_leases. ' 'Waiting {} seconds for schema to settle.'.format( SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise