def __init__(self, log_level=logging.INFO, hosts=None): """ Constructor. """ class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) if hosts is not None: self.hosts = hosts else: self.hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES) self.session = self.cluster.connect(KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.prepared_statements = {}
def __init__(self, log_level=logging.INFO, hosts=None): """ Constructor. """ class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) if hosts is not None: self.hosts = hosts else: self.hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES) self.session = self.cluster.connect(KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.prepared_statements = {}
def backup_data(path, keyname): """ Backup Cassandra snapshot data directories/files. Args: path: A string containing the location to store the backup on each of the DB machines. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logger.info("Starting new db backup.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') for db_ip in db_ips: appscale_utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL)) appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL)) get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\ format(APPSCALE_DATA_DIR) du_output = appscale_utils.ssh(db_ip, keyname, get_snapshot_size, method=subprocess.check_output) backup_size = sum( int(line.split()[0]) for line in du_output.split('\n') if line) output_dir = '/'.join(path.split('/')[:-1]) + '/' df_output = appscale_utils.ssh(db_ip, keyname, 'df {}'.format(output_dir), method=subprocess.check_output) available = int(df_output.split('\n')[1].split()[3]) if backup_size > available * PADDING_PERCENTAGE: raise BRException('{} has insufficient space: {}/{}'.format( db_ip, available * PADDING_PERCENTAGE, backup_size)) cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\ '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path) appscale_utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir, create_tar)) logger.info("Done with db backup.")
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) self.session = cluster.connect(keyspace=KEYSPACE) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM
def backup_data(path, keyname): """ Backup Cassandra snapshot data directories/files. Args: path: A string containing the location to store the backup on each of the DB machines. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db backup.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') for db_ip in db_ips: appscale_utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL)) appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL)) get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\ format(APPSCALE_DATA_DIR) du_output = appscale_utils.ssh(db_ip, keyname, get_snapshot_size, method=subprocess.check_output) backup_size = sum(int(line.split()[0]) for line in du_output.split('\n') if line) output_dir = '/'.join(path.split('/')[:-1]) + '/' df_output = appscale_utils.ssh(db_ip, keyname, 'df {}'.format(output_dir), method=subprocess.check_output) available = int(df_output.split('\n')[1].split()[3]) if backup_size > available * PADDING_PERCENTAGE: raise BRException('{} has insufficient space: {}/{}'. format(db_ip, available * PADDING_PERCENTAGE, backup_size)) cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\ '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path) appscale_utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir, create_tar)) logging.info("Done with db backup.")
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) if is_master: global zk_client zk_client = KazooClient(hosts=','.join( appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) app = tornado.web.Application( stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) + stats_app.get_cluster_stats_api_routes(is_master), debug=False) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logger.info("Hermes is up and listening on port: {}.".format( constants.HERMES_PORT))
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) if is_master: global zk_client zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) app = tornado.web.Application( stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) + stats_app.get_cluster_stats_api_routes(is_master), debug=False ) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logger.info("Hermes is up and listening on port: {}." .format(constants.HERMES_PORT))
def main(): """ Performs schema upgrades. """ hosts = appscale_info.get_db_ips() cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) session = cluster.connect(KEYSPACE) table = 'group_updates' column = 'last_update' temp_column = 'last_update_temp' key_column = 'group' tables = cluster.metadata.keyspaces[KEYSPACE].tables assert table in tables, 'The table {} was not found'.format(table) columns = tables[table].columns assert column in columns or temp_column in columns,\ '{}.{} was not found'.format(table, column) if (column in columns and columns[column].cql_type == 'bigint' and temp_column not in columns): logging.info('{}.{} is already the correct type'.format(table, column)) return if column in columns and columns[column].cql_type != 'bigint': if temp_column not in columns: logging.info('Adding new column with correct type') statement = 'ALTER TABLE {} ADD {} int'.format(table, temp_column) session.execute(statement) copy_column(session, table, 'group', column, temp_column) logging.info('Dropping {}.{}'.format(table, column)) session.execute('ALTER TABLE {} DROP {}'.format(table, column)) logging.info('Creating {}.{}'.format(table, column)) session.execute('ALTER TABLE {} ADD {} bigint'.format(table, column)) copy_column(session, table, key_column, temp_column, column) logging.info('Dropping {}.{}'.format(table, temp_column)) session.execute('ALTER TABLE {} DROP {}'.format(table, temp_column)) logging.info('Schema upgrade complete')
def main(): """ Performs schema upgrades. """ hosts = appscale_info.get_db_ips() cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) session = cluster.connect(KEYSPACE) table = 'group_updates' column = 'last_update' temp_column = 'last_update_temp' key_column = 'group' tables = cluster.metadata.keyspaces[KEYSPACE].tables assert table in tables, 'The table {} was not found'.format(table) columns = tables[table].columns assert column in columns or temp_column in columns,\ '{}.{} was not found'.format(table, column) if (column in columns and columns[column].cql_type == 'bigint' and temp_column not in columns): logger.info('{}.{} is already the correct type'.format(table, column)) return if column in columns and columns[column].cql_type != 'bigint': if temp_column not in columns: logger.info('Adding new column with correct type') statement = 'ALTER TABLE {} ADD {} int'.format(table, temp_column) session.execute(statement) copy_column(session, table, 'group', column, temp_column) logger.info('Dropping {}.{}'.format(table, column)) session.execute('ALTER TABLE {} DROP {}'.format(table, column)) logger.info('Creating {}.{}'.format(table, column)) session.execute('ALTER TABLE {} ADD {} bigint'.format(table, column)) copy_column(session, table, key_column, temp_column, column) logger.info('Dropping {}.{}'.format(table, temp_column)) session.execute('ALTER TABLE {} DROP {}'.format(table, temp_column)) logger.info('Schema upgrade complete')
def __init__(self, log_level=logging.INFO, hosts=None): """ Constructor. """ class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) if hosts is not None: self.hosts = hosts else: self.hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES, load_balancing_policy=LB_POLICY) self.session = self.cluster.connect(KEYSPACE) self.tornado_cassandra = TornadoCassandra(self.session) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.prepared_statements = {} # Provide synchronous version of some async methods self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity) self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity) self.batch_delete_sync = tornado_synchronous(self.batch_delete) self.valid_data_version_sync = tornado_synchronous( self.valid_data_version) self.range_query_sync = tornado_synchronous(self.range_query) self.get_metadata_sync = tornado_synchronous(self.get_metadata) self.set_metadata_sync = tornado_synchronous(self.set_metadata) self.get_indices_sync = tornado_synchronous(self.get_indices) self.delete_table_sync = tornado_synchronous(self.delete_table)
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) self.session = cluster.connect(keyspace=KEYSPACE) self.tornado_cassandra = TornadoCassandra(self.session) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM # Provide synchronous version of get_schema method self.get_schema_sync = tornado_synchronous(self.get_schema)
def __init__(self): hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) self.session = cluster.connect(keyspace=KEYSPACE) self.tornado_cassandra = TornadoCassandra(self.session) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM # Provide synchronous version of get_schema method self.get_schema_sync = tornado_synchronous(self.get_schema)
def __init__(self, log_level=logging.INFO, hosts=None): """ Constructor. """ class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) if hosts is not None: self.hosts = hosts else: self.hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES, load_balancing_policy=LB_POLICY) self.session = self.cluster.connect(KEYSPACE) self.tornado_cassandra = TornadoCassandra(self.session) break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) self.session.default_consistency_level = ConsistencyLevel.QUORUM self.prepared_statements = {} # Provide synchronous version of some async methods self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity) self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity) self.batch_delete_sync = tornado_synchronous(self.batch_delete) self.valid_data_version_sync = tornado_synchronous(self.valid_data_version) self.range_query_sync = tornado_synchronous(self.range_query) self.get_metadata_sync = tornado_synchronous(self.get_metadata) self.set_metadata_sync = tornado_synchronous(self.set_metadata) self.get_indices_sync = tornado_synchronous(self.get_indices) self.delete_table_sync = tornado_synchronous(self.delete_table)
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument('--port', type=int, default=constants.HERMES_PORT, help='The port to listen on') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) app = web.Application(middlewares=[verify_secret_middleware]) route_items = [] route_items += get_local_stats_api_routes(is_lb, is_tq, is_db) route_items += get_cluster_stats_api_routes(is_master) for route, handler in route_items: app.router.add_get(route, handler) logger.info("Starting Hermes on port: {}.".format(args.port)) web.run_app(app, port=args.port, access_log=logger, access_log_format='%a "%r" %s %bB %Tfs "%{User-Agent}i"')
def get_kind_averages(keys): """ Get an average size for each kind. Args: keys: A list of dictionaries containing keys. Returns: A dictionary listing the average size of each kind. """ hosts = appscale_info.get_db_ips() cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES, load_balancing_policy=LB_POLICY) session = cluster.connect(KEYSPACE) entities_by_kind = {} for key_dict in keys: key = key_dict['key'] if is_entity(key): key_parts = key.split(KEY_DELIMITER) kind = key_parts[2].split(':')[0] kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) if kind_id not in entities_by_kind: entities_by_kind[kind_id] = {'keys': [], 'size': 0, 'fetched': 0} entities_by_kind[kind_id]['keys'].append(key) for kind_id, kind in entities_by_kind.iteritems(): shuffle(kind['keys']) if not entities_by_kind: return {} futures = [] for _ in range(50): kind = choice(entities_by_kind.keys()) try: key = entities_by_kind[kind]['keys'].pop() except IndexError: continue select = """ SELECT {value} FROM "{table}" WHERE {key}=%(key)s AND {column}=%(column)s """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]} future = session.execute_async(select, parameters) futures.append({'future': future, 'kind': kind}) for future_dict in futures: future = future_dict['future'] kind = future_dict['kind'] try: entity = future.result()[0].value except IndexError: continue entities_by_kind[kind]['size'] += len(entity) entities_by_kind[kind]['fetched'] += 1 kind_averages = {} for kind_id, kind in entities_by_kind.iteritems(): try: kind_averages[kind_id] = int(kind['size'] / kind['fetched']) except ZeroDivisionError: kind_averages[kind_id] = 0 return kind_averages
def restore_data(path, keyname, force=False): """ Restores the Cassandra backup. Args: path: A string containing the location on each of the DB machines to use for restoring data. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db restore.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') machines_without_restore = [] for db_ip in db_ips: exit_code = appscale_utils.ssh(db_ip, keyname, 'ls {}'.format(path), method=subprocess.call) if exit_code != ExitCodes.SUCCESS: machines_without_restore.append(db_ip) if machines_without_restore and not force: logging.info('The following machines do not have a restore file: {}'. format(machines_without_restore)) response = raw_input('Would you like to continue? [y/N] ') if response not in ['Y', 'y']: return for db_ip in db_ips: logging.info('Stopping Cassandra on {}'.format(db_ip)) summary = appscale_utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries = SERVICE_RETRIES while status != MonitStates.UNMONITORED: appscale_utils.ssh(db_ip, keyname, 'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = appscale_utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException('Unable to stop Cassandra') cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: logging.info('Restoring Cassandra data on {}'.format(db_ip)) clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\ format(cassandra_dir) appscale_utils.ssh(db_ip, keyname, clear_db) if db_ip not in machines_without_restore: appscale_utils.ssh(db_ip, keyname, 'tar xf {} -C {}'.format(path, cassandra_dir)) appscale_utils.ssh(db_ip, keyname, 'chown -R cassandra {}'.format(cassandra_dir)) logging.info('Starting Cassandra on {}'.format(db_ip)) retries = SERVICE_RETRIES status = MonitStates.UNMONITORED while status != MonitStates.RUNNING: appscale_utils.ssh(db_ip, keyname, 'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = appscale_utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException('Unable to start Cassandra') appscale_utils.ssh(db_ip, keyname, 'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME)) logging.info('Waiting for Cassandra cluster to be ready') db_ip = db_ips[0] deadline = time.time() + SCHEMA_CHANGE_TIMEOUT while True: ready = True try: output = appscale_utils.ssh( db_ip, keyname, '{} status'.format(NODE_TOOL), method=subprocess.check_output) nodes_ready = len([line for line in output.split('\n') if line.startswith('UN')]) if nodes_ready < len(db_ips): ready = False except CalledProcessError: ready = False if ready: break if time.time() > deadline: logging.warning('Cassandra cluster still not ready.') break time.sleep(3) logging.info("Done with db restore.")
def restore_data(path, keyname, force=False): """ Restores the Cassandra backup. Args: path: A string containing the location on each of the DB machines to use for restoring data. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logger.info("Starting new db restore.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') machines_without_restore = [] for db_ip in db_ips: exit_code = appscale_utils.ssh(db_ip, keyname, 'ls {}'.format(path), method=subprocess.call) if exit_code != utils.ExitCodes.SUCCESS: machines_without_restore.append(db_ip) if machines_without_restore and not force: logger.info( 'The following machines do not have a restore file: {}'.format( machines_without_restore)) response = raw_input('Would you like to continue? [y/N] ') if response not in ['Y', 'y']: return for db_ip in db_ips: logger.info('Stopping Cassandra on {}'.format(db_ip)) summary = appscale_utils.ssh(db_ip, keyname, 'appscale-admin summary', method=subprocess.check_output) status_line = next((line for line in summary.split('\n') if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '') retries = SERVICE_RETRIES while MonitStates.UNMONITORED not in status_line: appscale_utils.ssh( db_ip, keyname, 'appscale-stop-service {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = appscale_utils.ssh(db_ip, keyname, 'appscale-admin summary', method=subprocess.check_output) status_line = next( (line for line in summary.split('\n') if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '') retries -= 1 if retries < 0: raise BRException('Unable to stop Cassandra') cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: logger.info('Restoring Cassandra data on {}'.format(db_ip)) clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\ format(cassandra_dir) appscale_utils.ssh(db_ip, keyname, clear_db) if db_ip not in machines_without_restore: appscale_utils.ssh(db_ip, keyname, 'tar xf {} -C {}'.format(path, cassandra_dir)) appscale_utils.ssh(db_ip, keyname, 'chown -R cassandra {}'.format(cassandra_dir)) logger.info('Starting Cassandra on {}'.format(db_ip)) retries = SERVICE_RETRIES status_line = MonitStates.UNMONITORED while MonitStates.RUNNING not in status_line: appscale_utils.ssh( db_ip, keyname, 'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = appscale_utils.ssh(db_ip, keyname, 'appscale-admin summary', method=subprocess.check_output) status_line = next( (line for line in summary.split('\n') if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '') retries -= 1 if retries < 0: raise BRException('Unable to start Cassandra') appscale_utils.ssh( db_ip, keyname, 'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME)) logger.info('Waiting for Cassandra cluster to be ready') db_ip = db_ips[0] deadline = time.time() + SCHEMA_CHANGE_TIMEOUT while True: ready = True try: output = appscale_utils.ssh(db_ip, keyname, '{} status'.format(NODE_TOOL), method=subprocess.check_output) nodes_ready = len( [line for line in output.split('\n') if line.startswith('UN')]) if nodes_ready < len(db_ips): ready = False except CalledProcessError: ready = False if ready: break if time.time() > deadline: logger.warning('Cassandra cluster still not ready.') break time.sleep(3) logger.info("Done with db restore.")
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg( 'Replication must be greater than zero') hosts = appscale_info.get_db_ips() cluster = None session = None remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = { 'class': 'SimpleStrategy', 'replication_factor': replication } session.execute(create_keyspace, {'replication': keyspace_replication}, timeout=SCHEMA_CHANGE_TIMEOUT) session.set_keyspace(KEYSPACE) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logging.info('Trying to create {}'.format(table)) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating {} table. Waiting {} ' 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_pull_queue_tables(cluster, session) create_entity_ids_table(session) first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format( dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format(table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) if not existing_entities: parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(POST_JOURNAL_VERSION)) } session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN)) } session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = { 'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray('true') } session.execute(metadata_insert, parameters) logging.info('Cassandra is primed.')
def get_random_db_node(): return [random.choice(appscale_info.get_db_ips())]
def get_kind_averages(keys): """ Get an average size for each kind. Args: keys: A list of dictionaries containing keys. Returns: A dictionary listing the average size of each kind. """ hosts = appscale_info.get_db_ips() cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES) session = cluster.connect(KEYSPACE) entities_by_kind = {} for key_dict in keys: key = key_dict['key'] if is_entity(key): key_parts = key.split(KEY_DELIMITER) kind = key_parts[2].split(':')[0] kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) if kind_id not in entities_by_kind: entities_by_kind[kind_id] = { 'keys': [], 'size': 0, 'fetched': 0 } entities_by_kind[kind_id]['keys'].append(key) for kind_id, kind in entities_by_kind.iteritems(): shuffle(kind['keys']) if not entities_by_kind: return {} futures = [] for _ in range(50): kind = choice(entities_by_kind.keys()) try: key = entities_by_kind[kind]['keys'].pop() except IndexError: continue select = """ SELECT {value} FROM "{table}" WHERE {key}=%(key)s AND {column}=%(column)s """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]} future = session.execute_async(select, parameters) futures.append({'future': future, 'kind': kind}) for future_dict in futures: future = future_dict['future'] kind = future_dict['kind'] try: entity = future.result()[0].value except IndexError: continue entities_by_kind[kind]['size'] += len(entity) entities_by_kind[kind]['fetched'] += 1 kind_averages = {} for kind_id, kind in entities_by_kind.iteritems(): try: kind_averages[kind_id] = int(kind['size'] / kind['fetched']) except ZeroDivisionError: kind_averages[kind_id] = 0 return kind_averages
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg('Replication must be greater than zero') zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips()) zk_client.start() hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = {'class': 'SimpleStrategy', 'replication_factor': replication} session.execute(create_keyspace, {'replication': keyspace_replication}, timeout=SCHEMA_CHANGE_TIMEOUT) session.set_keyspace(KEYSPACE) logger.info('Waiting for all hosts to be connected') deadline = time.time() + SCHEMA_CHANGE_TIMEOUT while True: if time.time() > deadline: logger.warning('Timeout when waiting for hosts to join. Continuing ' 'with connected hosts.') break if len(session.get_pool_state()) == len(hosts): break time.sleep(1) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logger.info('Trying to create {}'.format(table)) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating {} table. Waiting {} ' 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise migrate_composite_index_metadata(cluster, session, zk_client) create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_pull_queue_tables(cluster, session) create_entity_ids_table(session) first_entity = session.execute( 'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format( table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE ) if existing_entities: current_version = current_datastore_version(session) if current_version == 1.0: # Instruct the groomer to reclean the indexes. parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.DIRTY))} session.execute(metadata_insert, parameters) parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION))} session.execute(metadata_insert, parameters) else: parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION))} session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN))} session.execute(metadata_insert, parameters) # Indicate that scatter property values do not need to be populated. parameters = {'key': bytearray(cassandra_interface.SCATTER_PROP_KEY), 'column': cassandra_interface.SCATTER_PROP_KEY, 'value': bytearray(ScatterPropStates.POPULATED)} session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray(str(CURRENT_VERSION))} session.execute(metadata_insert, parameters) logger.info('Cassandra is primed.')
def get_random_db_node(): return [random.choice(appscale_info.get_db_ips())]
async def get_current(cls): """ Retrieves Cassandra status info. Returns: An instance of CassandraStatsSnapshot. """ start = time.time() process = await asyncio.create_subprocess_shell( NODETOOL_STATUS_COMMAND, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) logger.info('Started subprocess `{}` (pid: {})' .format(NODETOOL_STATUS_COMMAND, process.pid)) try: # Wait for the subprocess to finish stdout, stderr = await asyncio.wait_for( process.communicate(), NODETOOL_STATUS_TIMEOUT ) except asyncio.TimeoutError: raise NodetoolStatusError( 'Timed out waiting for subprocess `{}` (pid: {})' .format(NODETOOL_STATUS_COMMAND, process.pid) ) output = stdout.decode() error = stderr.decode() if error: logger.warning(error) if process.returncode != 0: raise NodetoolStatusError('Subprocess failed with return code {} ({})' .format(process.returncode, error)) known_db_nodes = set(appscale_info.get_db_ips()) nodes = [] shown_nodes = set() if cls.SINGLENODE_HEADER_PATTERN.search(output): for match in cls.SINGLENODE_STATUS_PATTERN.finditer(output): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = 1 host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) elif cls.MULTINODE_HEADER_PATTERN.search(output): for match in cls.MULTINODE_STATUS_PATTERN.finditer(output): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = match.group('tokens_num') host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) else: raise NodetoolStatusError( '`{}` output does not contain expected header. Actual output:\n{}' .format(NODETOOL_STATUS_COMMAND, output) ) snapshot = CassandraStatsSnapshot( utc_timestamp=int(time.time()), nodes=nodes, missing_nodes=list(known_db_nodes - shown_nodes), unknown_nodes=list(shown_nodes - known_db_nodes) ) logger.info('Prepared Cassandra nodes status in ' '{elapsed:.2f}s.'.format(elapsed=time.time()-start)) return snapshot
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg('Replication must be greater than zero') hosts = appscale_info.get_db_ips() cluster = None session = None remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = {'class': 'SimpleStrategy', 'replication_factor': replication} session.execute(create_keyspace, {'replication': keyspace_replication}, timeout=SCHEMA_CHANGE_TIMEOUT) session.set_keyspace(KEYSPACE) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logging.info('Trying to create {}'.format(table)) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logging.warning( 'Encountered an operation timeout while creating {} table. Waiting {} ' 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_pull_queue_tables(cluster, session) create_entity_ids_table(session) first_entity = session.execute( 'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format( table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE ) if not existing_entities: parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(POST_JOURNAL_VERSION))} session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN))} session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray('true')} session.execute(metadata_insert, parameters) logging.info('Cassandra is primed.')
def prime_cassandra(replication): """ Create Cassandra keyspace and initial tables. Args: replication: An integer specifying the replication factor for the keyspace. Raises: AppScaleBadArg if replication factor is not greater than 0. TypeError if replication is not an integer. """ if not isinstance(replication, int): raise TypeError('Replication must be an integer') if int(replication) <= 0: raise dbconstants.AppScaleBadArg( 'Replication must be greater than zero') zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips()) zk_client.start() hosts = appscale_info.get_db_ips() remaining_retries = INITIAL_CONNECT_RETRIES while True: try: cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) session = cluster.connect() break except cassandra.cluster.NoHostAvailable as connection_error: remaining_retries -= 1 if remaining_retries < 0: raise connection_error time.sleep(3) session.default_consistency_level = ConsistencyLevel.QUORUM create_keyspace = """ CREATE KEYSPACE IF NOT EXISTS "{keyspace}" WITH REPLICATION = %(replication)s """.format(keyspace=KEYSPACE) keyspace_replication = { 'class': 'SimpleStrategy', 'replication_factor': replication } session.execute(create_keyspace, {'replication': keyspace_replication}, timeout=SCHEMA_CHANGE_TIMEOUT) session.set_keyspace(KEYSPACE) logger.info('Waiting for all hosts to be connected') deadline = time.time() + SCHEMA_CHANGE_TIMEOUT while True: if time.time() > deadline: logger.warning( 'Timeout when waiting for hosts to join. Continuing ' 'with connected hosts.') break if len(session.get_pool_state()) == len(hosts): break time.sleep(1) for table in dbconstants.INITIAL_TABLES: create_table = """ CREATE TABLE IF NOT EXISTS "{table}" ( {key} blob, {column} text, {value} blob, PRIMARY KEY ({key}, {column}) ) WITH COMPACT STORAGE """.format(table=table, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) logger.info('Trying to create {}'.format(table)) try: session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) except cassandra.OperationTimedOut: logger.warning( 'Encountered an operation timeout while creating {} table. Waiting {} ' 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) time.sleep(SCHEMA_CHANGE_TIMEOUT) raise migrate_composite_index_metadata(cluster, session, zk_client) create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) create_entity_ids_table(session) first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format( dbconstants.APP_ENTITY_TABLE)) existing_entities = len(list(first_entity)) == 1 define_ua_schema(session) metadata_insert = """ INSERT INTO "{table}" ({key}, {column}, {value}) VALUES (%(key)s, %(column)s, %(value)s) """.format(table=dbconstants.DATASTORE_METADATA_TABLE, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, value=ThriftColumn.VALUE) if existing_entities: current_version = current_datastore_version(session) if current_version == 1.0: # Instruct the groomer to reclean the indexes. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.DIRTY)) } session.execute(metadata_insert, parameters) parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) else: parameters = { 'key': bytearray(cassandra_interface.VERSION_INFO_KEY), 'column': cassandra_interface.VERSION_INFO_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) # Mark the newly created indexes as clean. parameters = { 'key': bytearray(cassandra_interface.INDEX_STATE_KEY), 'column': cassandra_interface.INDEX_STATE_KEY, 'value': bytearray(str(IndexStates.CLEAN)) } session.execute(metadata_insert, parameters) # Indicate that scatter property values do not need to be populated. parameters = { 'key': bytearray(cassandra_interface.SCATTER_PROP_KEY), 'column': cassandra_interface.SCATTER_PROP_KEY, 'value': bytearray(ScatterPropStates.POPULATED) } session.execute(metadata_insert, parameters) # Indicate that the database has been successfully primed. parameters = { 'key': bytearray(cassandra_interface.PRIMED_KEY), 'column': cassandra_interface.PRIMED_KEY, 'value': bytearray(str(CURRENT_VERSION)) } session.execute(metadata_insert, parameters) logger.info('Cassandra is primed.')
def get_current(cls): """ Retrieves Cassandra status info. Returns: An instance of RabbitMQStatsSnapshot. """ start = time.time() try: proc = process.Subprocess(NODETOOL_STATUS_COMMAND, stdout=process.Subprocess.STREAM, stderr=process.Subprocess.STREAM) status = yield proc.stdout.read_until_close() err = yield proc.stderr.read_until_close() if err: logger.error(err) except process.CalledProcessError as err: raise NodetoolStatusError(err) known_db_nodes = set(appscale_info.get_db_ips()) nodes = [] shown_nodes = set() if cls.SINGLENODE_HEADER_PATTERN.search(status): for match in cls.SINGLENODE_STATUS_PATTERN.finditer(status): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = 1 host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) elif cls.MULTINODE_HEADER_PATTERN.search(status): for match in cls.MULTINODE_STATUS_PATTERN.finditer(status): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = match.group('tokens_num') host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) else: raise NodetoolStatusError( '`nodetool status` output does not contain expected header') snapshot = CassandraStatsSnapshot( utc_timestamp=int(time.time()), nodes=nodes, missing_nodes=list(known_db_nodes - shown_nodes), unknown_nodes=list(shown_nodes - known_db_nodes)) logger.info('Prepared Cassandra nodes status in ' '{elapsed:.1f}s.'.format(elapsed=time.time() - start)) raise gen.Return(snapshot)
def get_current(cls): """ Retrieves Cassandra status info. Returns: An instance of RabbitMQStatsSnapshot. """ start = time.time() try: proc = process.Subprocess( NODETOOL_STATUS_COMMAND, stdout=process.Subprocess.STREAM, stderr=process.Subprocess.STREAM ) status = yield proc.stdout.read_until_close() err = yield proc.stderr.read_until_close() if err: logger.error(err) except process.CalledProcessError as err: raise NodetoolStatusError(err) known_db_nodes = set(appscale_info.get_db_ips()) nodes = [] shown_nodes = set() if cls.SINGLENODE_HEADER_PATTERN.search(status): for match in cls.SINGLENODE_STATUS_PATTERN.finditer(status): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = 1 host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) elif cls.MULTINODE_HEADER_PATTERN.search(status): for match in cls.MULTINODE_STATUS_PATTERN.finditer(status): address = match.group('address') status = match.group('status') state = match.group('state') load = match.group('load') size_unit = match.group('size_unit') owns_pct = match.group('owns_pct') tokens_num = match.group('tokens_num') host_id = match.group('host_id') rack = match.group('rack') node_stats = CassandraNodeStats( address=address, status=cls.STATUSES[status], state=cls.STATES[state], load=int(float(load) * cls.SIZE_UNITS[size_unit]), owns_pct=float(owns_pct), tokens_num=int(tokens_num), host_id=host_id, rack=rack, ) nodes.append(node_stats) shown_nodes.add(address) else: raise NodetoolStatusError( '`nodetool status` output does not contain expected header' ) snapshot = CassandraStatsSnapshot( utc_timestamp=int(time.time()), nodes=nodes, missing_nodes=list(known_db_nodes - shown_nodes), unknown_nodes=list(shown_nodes - known_db_nodes) ) logger.info('Prepared Cassandra nodes status in ' '{elapsed:.1f}s.'.format(elapsed=time.time()-start)) raise gen.Return(snapshot)
def restore_data(path, keyname, force=False): """ Restores the Cassandra backup. Args: path: A string containing the location on each of the DB machines to use for restoring data. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any Cassandra machines or if DB machine has insufficient space. """ logging.info("Starting new db restore.") db_ips = appscale_info.get_db_ips() if not db_ips: raise BRException('Unable to find any Cassandra machines.') machines_without_restore = [] for db_ip in db_ips: exit_code = utils.ssh(db_ip, keyname, 'ls {}'.format(path), method=subprocess.call) if exit_code != ExitCodes.SUCCESS: machines_without_restore.append(db_ip) if machines_without_restore and not force: logging.info( 'The following machines do not have a restore file: {}'.format( machines_without_restore)) response = raw_input('Would you like to continue? [y/N] ') if response not in ['Y', 'y']: return for db_ip in db_ips: logging.info('Stopping Cassandra on {}'.format(db_ip)) summary = utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries = SERVICE_RETRIES while status != MonitStates.UNMONITORED: utils.ssh(db_ip, keyname, 'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException('Unable to stop Cassandra') cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR) for db_ip in db_ips: logging.info('Restoring Cassandra data on {}'.format(db_ip)) clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\ format(cassandra_dir) utils.ssh(db_ip, keyname, clear_db) if db_ip not in machines_without_restore: utils.ssh(db_ip, keyname, 'tar xf {} -C {}'.format(path, cassandra_dir)) utils.ssh(db_ip, keyname, 'chown -R cassandra {}'.format(cassandra_dir)) logging.info('Starting Cassandra on {}'.format(db_ip)) retries = SERVICE_RETRIES status = MonitStates.UNMONITORED while status != MonitStates.RUNNING: utils.ssh(db_ip, keyname, 'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call) time.sleep(3) summary = utils.ssh(db_ip, keyname, 'monit summary', method=subprocess.check_output) status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME) retries -= 1 if retries < 0: raise BRException('Unable to start Cassandra') utils.ssh(db_ip, keyname, 'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME)) logging.info("Done with db restore.")