def test_yaml_token_enforcement_with_tokens_and_autobootstrap(self): with open('tests/resources/yaml/original/cassandra_with_tokens.yaml', 'r') as f: shutil.copyfile('tests/resources/yaml/original/cassandra_with_tokens_and_autobootstrap.yaml', 'tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml') config = configparser.ConfigParser(interpolation=None) config['cassandra'] = { 'config_file': os.path.join(os.path.dirname(__file__), 'resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml'), 'start_cmd': '/etc/init.d/cassandra start', 'stop_cmd': '/etc/init.d/cassandra stop', 'is_ccm': '1' } medusa_config = MedusaConfig( storage=None, monitoring=None, cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']), ssh=None, checks=None, logging=None ) cassandra = Cassandra(medusa_config.cassandra) tokens = ['1', '2', '3'] cassandra.replaceTokensInCassandraYamlAndDisableBootstrap(tokens) with open('tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml', 'r') as f: modified_yaml = yaml.load(f, Loader=yaml.BaseLoader) self.assertEqual(modified_yaml.get('num_tokens'), '3') self.assertEqual(modified_yaml.get('initial_token'), '1,2,3') self.assertEqual(modified_yaml.get('auto_bootstrap'), 'false')
def test_is_cass_default_ports_valid(self): # Not using ccm, directing check for cassandra health. medusa_config_v2 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c2.yaml') # Not using ccm, directing check for cassandra health. medusa_config_v3 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c3.yaml') # Not using ccm, directing check for cassandra health. medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v2 = Cassandra(medusa_config_v2, release_version="2") cassandra_v3 = Cassandra(medusa_config_v3, release_version="3") cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") assert cassandra_v2.rpc_port == 9160 assert cassandra_v2.native_port == 9042 assert cassandra_v2.storage_port == 7000 assert cassandra_v3.rpc_port == 9160 assert cassandra_v3.native_port == 9042 assert cassandra_v3.storage_port == 7000 assert cassandra_v4.rpc_port == 9160 assert cassandra_v4.native_port == 9042 assert cassandra_v4.storage_port == 7000
def build_indices(config, noop): """ One-off function to construct the backup index. This function lists all complete cluster backups and all node backups in them. For all node backups found this way, it will find the latest one per node and update index accordingly. """ try: storage = medusa.storage.Storage(config=config.storage) is_ccm = int(shlex.split(config.cassandra.is_ccm)[0]) all_backups = [] if is_ccm != 1: cassandra = Cassandra(config.cassandra) with cassandra.new_session() as cql_session: tokenmap = cql_session.tokenmap() for fqdn in tokenmap.keys(): logging.info("processing {}".format(fqdn)) all_backups = all_backups + list( storage.discover_node_backups(fqdn=fqdn)) else: all_backups = list(storage.discover_node_backups()) latest_node_backups = dict() if noop: logging.info('--noop was set, will only print the indices') for node_backup in all_backups: # if we are dealing with a complete backup if node_backup.finished is not None: # check if this backup is newer than what was seen so far latest = latest_node_backups.get(node_backup.fqdn, node_backup) if node_backup.finished >= latest.finished: latest_node_backups[node_backup.fqdn] = node_backup # if requested, add the node backup to the index logging.debug('Found backup {} from {}'.format( node_backup.name, node_backup.fqdn)) if not noop: add_backup_start_to_index(storage, node_backup) add_backup_finish_to_index(storage, node_backup) # once we have seen all backups, we can set the latest ones as well for fqdn, node_backup in latest_node_backups.items(): logging.debug('Latest backup {} is {}'.format( fqdn, node_backup.name)) if not noop: set_latest_backup_in_index(storage, node_backup) except Exception: traceback.print_exc() sys.exit(1)
def test_cassandra_start(self, subproc_mock, replace_tokens_mock): subproc_mock.return_value = None replace_tokens_mock.return_value = None subproc_mock.call_count = 0 replace_tokens_mock.call_count = 0 medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") cassandra_v4.start(['test-token']) # When start invoked with ccm active, expect both check_output and replace tokens are # invoked. assert subproc_mock.call_count == 1 and replace_tokens_mock.call_count == 1 # And, when start invoked with ccm active, expect that only check_output is called # as there is no need to replace tokens in c* yaml and disabling bootstrap. subproc_mock.call_count = 0 replace_tokens_mock.call_count = 0 medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="1", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") cassandra_v4.start(['test-token']) assert subproc_mock.call_count == 1 and replace_tokens_mock.call_count == 0
def test_cassandra_client_encryption_enabled_default_port(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt-default.yaml')) # Both ports are not assigned, using default self.assertEqual(c.native_port, 9142) self.assertEqual(c.rpc_port, 9160) self.assertEqual(c.storage_port, 7000)
def test_parsing_custom_seed_provider(self): # patch a sample yaml to have a custom seed provider with open('tests/resources/yaml/original/cassandra_with_tokens.yaml', 'r') as fi: yaml_dict = yaml.load(fi, Loader=yaml.FullLoader) yaml_dict['seed_provider'] = [{ 'class_name': 'org.foo.bar.CustomSeedProvider' }] with open( 'tests/resources/yaml/work/cassandra_with_custom_seedprovider.yaml', 'w') as fo: yaml.safe_dump(yaml_dict, fo) # pass the patched yaml to cassandra config config = configparser.ConfigParser(interpolation=None) config['cassandra'] = { 'config_file': os.path.join( os.path.dirname(__file__), 'resources/yaml/work/cassandra_with_custom_seedprovider.yaml'), 'start_cmd': '/etc/init.d/cassandra start', 'stop_cmd': '/etc/init.d/cassandra stop', 'is_ccm': '1' } cassandra_config = _namedtuple_from_dict(CassandraConfig, config['cassandra']) # init cassandra config and check the custom seed provider was ignored cassandra = Cassandra(cassandra_config) self.assertEqual([], sorted(cassandra.seeds))
def __init__(self, config, backup_name, stagger, mode, temp_dir, parallel_snapshots, parallel_uploads): self.id = uuid.uuid4() # TODO expose the argument below (Note that min(1000, <number_of_hosts>) will be used) self.orchestration_snapshots = Orchestration(config, parallel_snapshots) self.orchestration_uploads = Orchestration(config, parallel_uploads) self.config = config self.backup_name = backup_name self.stagger = stagger self.mode = mode self.temp_dir = temp_dir self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id) self.hosts = {} self.cassandra = Cassandra(config) self.snapshot_tag = '{}{}'.format(self.cassandra.SNAPSHOT_PREFIX, self.backup_name) fqdn_resolver = medusa.config.evaluate_boolean(self.config.cassandra.resolve_ip_addresses) self.fqdn_resolver = HostnameResolver(fqdn_resolver)
def test_seed_parsing(self): shutil.copyfile('tests/resources/yaml/original/cassandra_with_tokens_and_autobootstrap.yaml', 'tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml') config = configparser.ConfigParser(interpolation=None) config['cassandra'] = { 'config_file': os.path.join(os.path.dirname(__file__), 'resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml'), 'start_cmd': '/etc/init.d/cassandra start', 'stop_cmd': '/etc/init.d/cassandra stop', 'is_ccm': '1' } config["grpc"] = { "enabled": "0" } config['kubernetes'] = { "enabled": "0" } medusa_config = MedusaConfig( file_path=None, storage=None, monitoring=None, cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']), ssh=None, checks=None, logging=None, grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']), kubernetes=_namedtuple_from_dict(KubernetesConfig, config['kubernetes']), ) cassandra = Cassandra(medusa_config) self.assertEqual(["127.0.0.1", "127.0.0.2"], sorted(cassandra.seeds))
def test_cassandra_client_encryption_enabled_reuse_port(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt.yaml')) # Expecting with client_encryption_options enabled, and not defining value for native_transport_port_ssl # use native_transport_port value as defined. self.assertEqual(c.native_port, 9123) self.assertEqual(c.rpc_port, 9160) self.assertEqual(c.storage_port, 7000)
def test_cassandra_internode_encryption_v4_ports(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/' 'cassandra-internode-encrypt.yaml'), release_version="4") # Uses ssl_storage_port value self.assertEqual(c.storage_port, 10001) self.assertEqual(c.native_port, 9042) self.assertEqual(c.rpc_port, 9160)
def test_cassandra_client_encryption_enabled_ssl_port(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt-sslport.yaml')) # server_encryption_options /internode set to'all', with both # ports native_transport_port_ssl AND native_transport_port # defined. Expected to use native_transport_port_ssl self.assertEqual(c.storage_port, 7001) self.assertEqual(c.native_port, 18675) self.assertEqual(c.rpc_port, 9160)
def test_cassandra_non_encrypt_comm_ports(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-no-encrypt.yaml'), release_version="4") # no encryption, use provided from storage_port even though having ssl_storage_port as well. self.assertEqual(c.storage_port, 15000) self.assertEqual(c.native_port, 9777) self.assertEqual(c.rpc_port, 9160)
def test_cassandra_internode_encrypt_v4_default_ports(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/' 'cassandra-internode-encrypt-default.yaml'), release_version="4") # Secure connection desired. # Both ssl_storage_port and storage_port are not defined. # Based on v4 release version, should default to 7001. self.assertEqual(c.storage_port, 7001) self.assertEqual(c.native_port, 9042) self.assertEqual(c.rpc_port, 9160)
def test_is_cassandra_v2_healthy(self, fm): fm.return_value = True host = Mock() medusa_config_v2 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c2.yaml') cassandra_v2 = Cassandra(medusa_config_v2, release_version="2") # When c* version 2 is used, check for the port values. self.assertTrue(is_cassandra_healthy("all", cassandra_v2, host)) assert fm.call_count == 3
def test_cassandra_internode_encrypt_nossl_v4_default_ports(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/' 'cassandra-internode-encrypt-nossl-default.yaml'), release_version="4") # Secure connection desired. # The ssl_storage_port is not defined. # Based on being c* v4, this will use the value of the specified storage_port self.assertEqual(c.storage_port, 8675) self.assertEqual(c.native_port, 9042) self.assertEqual(c.rpc_port, 9160)
def test_is_cassandra_healthy_invalid_input(self): host = Mock() # Not using ccm, directing check for cassandra health. medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") # invalid cassandra input self.assertFalse(is_cassandra_healthy("all", {}, host)) # invalid host input self.assertFalse(is_cassandra_healthy("all", cassandra_v4, {}))
def test_is_cassandra_v3_healthy(self, fm): fm.return_value = True host = Mock() # Not using ccm, directing check for cassandra health. medusa_config_v3 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c3.yaml') cassandra_v3 = Cassandra(medusa_config_v3, release_version="3") # When c* version 3 is used, check for the port values. self.assertTrue(is_cassandra_healthy("all", cassandra_v3, host)) assert fm.call_count == 3
def handle_backup(config, backup_name_arg, stagger_time, enable_md5_checks_flag, mode): start = datetime.datetime.now() backup_name = backup_name_arg or start.strftime('%Y%m%d%H%M') monitoring = Monitoring(config=config.monitoring) try: logging.debug( "Starting backup preparations with Mode: {}".format(mode)) storage = Storage(config=config.storage) cassandra = Cassandra(config) differential_mode = False if mode == "differential": differential_mode = True node_backup = storage.get_node_backup( fqdn=config.storage.fqdn, name=backup_name, differential_mode=differential_mode) if node_backup.exists(): raise IOError( 'Error: Backup {} already exists'.format(backup_name)) # Starting the backup logging.info( "Starting backup using Stagger: {} Mode: {} Name: {}".format( stagger_time, mode, backup_name)) BackupMan.update_backup_status(backup_name, BackupMan.STATUS_IN_PROGRESS) info = start_backup(storage, node_backup, cassandra, differential_mode, stagger_time, start, mode, enable_md5_checks_flag, backup_name, config, monitoring) BackupMan.update_backup_status(backup_name, BackupMan.STATUS_SUCCESS) logging.debug("Done with backup, returning backup result information") return (info["actual_backup_duration"], info["actual_start_time"], info["end_time"], info["node_backup"], info["node_backup_cache"], info["num_files"], info["start_time"], info["backup_name"]) except Exception as e: logging.error( "Issue occurred inside handle_backup Name: {} Error: {}".format( backup_name, str(e))) BackupMan.update_backup_status(backup_name, BackupMan.STATUS_FAILED) tags = ['medusa-node-backup', 'backup-error', backup_name] monitoring.send(tags, 1) medusa.utils.handle_exception( e, "Error occurred during backup: {}".format(str(e)), config)
def test_yaml_token_enforcement_no_tokens(self): with open('tests/resources/yaml/original/cassandra_no_tokens.yaml', 'r') as f: shutil.copyfile('tests/resources/yaml/original/cassandra_no_tokens.yaml', 'tests/resources/yaml/work/cassandra_no_tokens.yaml') config = configparser.ConfigParser(interpolation=None) config['cassandra'] = { 'config_file': os.path.join(os.path.dirname(__file__), 'resources/yaml/work/cassandra_no_tokens.yaml'), 'start_cmd': '/etc/init.d/cassandra start', 'stop_cmd': '/etc/init.d/cassandra stop', 'is_ccm': '1' } config["grpc"] = { "enabled": "0" } config['kubernetes'] = { "enabled": "0" } medusa_config = MedusaConfig( file_path=None, storage=None, monitoring=None, cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']), ssh=None, checks=None, logging=None, grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']), kubernetes=_namedtuple_from_dict(KubernetesConfig, config['kubernetes']), ) cassandra = Cassandra(medusa_config) tokens = ['1', '2', '3'] cassandra.replace_tokens_in_cassandra_yaml_and_disable_bootstrap(tokens) with open('tests/resources/yaml/work/cassandra_no_tokens.yaml', 'r') as f: modified_yaml = yaml.load(f, Loader=yaml.BaseLoader) self.assertEqual(modified_yaml.get('num_tokens'), '3') self.assertEqual(modified_yaml.get('initial_token'), '1,2,3') self.assertEqual(modified_yaml.get('auto_bootstrap'), 'false')
def test_is_cassandra_healthy_check_type_unknown(self, is_open): host = Mock() # Not using ccm, directing check for cassandra health. medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") is_open.return_value = True self.assertTrue(is_cassandra_healthy("not-thrift-and-not-all-check", cassandra_v4, host)) assert is_open.call_count == 1 is_open.call_count = 0 is_open.return_value = False self.assertFalse(is_cassandra_healthy("not-thrift-and-not-all-check", cassandra_v4, host)) assert is_open.call_count == 2
def test_is_cassandra_healthy_check_types(self, fm): host = Mock() # Not using ccm, directing check for cassandra health. medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0", yaml_file='resources/yaml/original/default-c4.yaml') cassandra_v4 = Cassandra(medusa_config_v4, release_version="4") # When c* version 4 is used, check for the port values. fm.return_value = True self.assertTrue(is_cassandra_healthy("all", cassandra_v4, host)) self.assertTrue(is_cassandra_healthy("thrift", cassandra_v4, host)) self.assertTrue(is_cassandra_healthy("unknown", cassandra_v4, host))
def test_parsing_custom_seed_provider(self): # patch a sample yaml to have a custom seed provider with open('tests/resources/yaml/original/cassandra_with_tokens.yaml', 'r') as fi: yaml_dict = yaml.load(fi, Loader=yaml.FullLoader) yaml_dict['seed_provider'] = [{ 'class_name': 'org.foo.bar.CustomSeedProvider' }] with open( 'tests/resources/yaml/work/cassandra_with_custom_seedprovider.yaml', 'w') as fo: yaml.safe_dump(yaml_dict, fo) # pass the patched yaml to cassandra config config = configparser.ConfigParser(interpolation=None) config['cassandra'] = { 'config_file': os.path.join( os.path.dirname(__file__), 'resources/yaml/work/cassandra_with_custom_seedprovider.yaml'), 'start_cmd': '/etc/init.d/cassandra start', 'stop_cmd': '/etc/init.d/cassandra stop', 'is_ccm': '1' } config["grpc"] = {"enabled": "0"} config['kubernetes'] = {"enabled": "0"} medusa_config = MedusaConfig( storage=None, monitoring=None, cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']), ssh=None, checks=None, logging=None, grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']), kubernetes=_namedtuple_from_dict(KubernetesConfig, config['kubernetes']), ) cassandra = Cassandra(medusa_config) self.assertEqual([], sorted(cassandra.seeds))
def restore_node_sstableloader(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables): cassandra = Cassandra(config) node_backup = None fqdns = config.storage.fqdn.split(",") for fqdn in fqdns: differential_blob = storage.storage_driver.get_blob( os.path.join(fqdn, backup_name, 'meta', 'differential')) node_backup = storage.get_node_backup( fqdn=fqdn, name=backup_name, differential_mode=True if differential_blob is not None else False) if not node_backup.exists(): logging.error('No such backup') sys.exit(1) fqtns_to_restore, ignored_fqtns = filter_fqtns(keyspaces, tables, node_backup.manifest) for fqtns in ignored_fqtns: logging.info('Skipping restore of {}'.format(fqtns)) if len(fqtns_to_restore) == 0: logging.error('There is nothing to restore') sys.exit(0) # Download the backup download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4()) logging.info('Downloading data from backup to {}'.format(download_dir)) download_data(config.storage, node_backup, fqtns_to_restore, destination=download_dir) invoke_sstableloader(config, download_dir, keep_auth, fqtns_to_restore, cassandra.storage_port) logging.info('Finished loading backup from {}'.format(fqdn)) # Clean the restored data from local temporary folder clean_path(download_dir, keep_folder=False) return node_backup
def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify, pssh_pool_size, keyspaces={}, tables={}, bypass_checks=False, use_sstableloader=False): self.id = uuid.uuid4() self.ringmap = None self.cluster_backup = cluster_backup self.session_provider = None self.config = config self.host_list = host_list self.seed_target = seed_target self.keep_auth = keep_auth self.verify = verify self.in_place = None self.temp_dir = temp_dir # temporary files self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id) self.host_map = {} # Map of backup host/target host for the restore process self.keyspaces = keyspaces self.tables = tables self.bypass_checks = bypass_checks self.use_sstableloader = use_sstableloader self.pssh_pool_size = pssh_pool_size self.cassandra = Cassandra(config.cassandra)
def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify, parallel_restores, keyspaces=None, tables=None, bypass_checks=False, use_sstableloader=False, version_target=None): self.id = uuid.uuid4() self.ringmap = None self.cluster_backup = cluster_backup self.session_provider = None self.orchestration = Orchestration(config, parallel_restores) self.config = config self.host_list = host_list self.seed_target = seed_target self.keep_auth = keep_auth self.verify = verify self.in_place = None self.temp_dir = temp_dir # temporary files self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id) self.host_map = { } # Map of backup host/target host for the restore process self.keyspaces = keyspaces if keyspaces else {} self.tables = tables if tables else {} self.bypass_checks = bypass_checks self.use_sstableloader = use_sstableloader self.pssh_pool_size = parallel_restores self.cassandra = Cassandra(config) fqdn_resolver = medusa.utils.evaluate_boolean( self.config.cassandra.resolve_ip_addresses) self.fqdn_resolver = HostnameResolver(fqdn_resolver) self._version_target = version_target
def restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables): differential_blob = storage.storage_driver.get_blob( os.path.join(config.storage.fqdn, backup_name, 'meta', 'differential')) node_backup = storage.get_node_backup( fqdn=config.storage.fqdn, name=backup_name, differential_mode=True if differential_blob is not None else False) if not node_backup.exists(): logging.error('No such backup') sys.exit(1) fqtns_to_restore = get_fqtns_to_restore(keyspaces, tables, node_backup.manifest) if len(fqtns_to_restore) == 0: logging.error('There is nothing to restore') sys.exit(0) cassandra = Cassandra(config.cassandra) # Download the backup download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4()) logging.info('Downloading data from backup to {}'.format(download_dir)) download_data(config.storage, node_backup, fqtns_to_restore, destination=download_dir) logging.info('Stopping Cassandra') cassandra.shutdown() # Clean the commitlogs, the saved cache to prevent any kind of conflict # especially around system tables. clean_path(cassandra.commit_logs_path) clean_path(cassandra.saved_caches_path) # move backup data to Cassandra data directory according to system table logging.info('Moving backup data to Cassandra data directory') manifest = json.loads(node_backup.manifest) for section in manifest: fqtn = "{}.{}".format(section['keyspace'], section['columnfamily']) if fqtn not in fqtns_to_restore: logging.debug('Skipping restore for {}'.format(fqtn)) continue maybe_restore_section(section, download_dir, cassandra.root, in_place, keep_auth) node_fqdn = storage.config.fqdn token_map_file = download_dir / 'tokenmap.json' with open(str(token_map_file), 'r') as f: tokens = get_node_tokens(node_fqdn, f) logging.debug("Parsed tokens: {}".format(tokens)) # possibly wait for seeds if seeds is not None: wait_for_seeds(config, seeds) else: logging.info('No --seeds specified so we will not wait for any') # Start up Cassandra logging.info('Starting Cassandra') # restoring in place retains system.local, which has tokens in it. no need to specify extra if in_place: cassandra.start_with_implicit_token() else: cassandra.start(tokens) return node_backup
def main(config, backup_name_arg, stagger_time, mode): start = datetime.datetime.now() backup_name = backup_name_arg or start.strftime('%Y%m%d%H') monitoring = Monitoring(config=config.monitoring) try: storage = Storage(config=config.storage) cassandra = Cassandra(config) differential_mode = False if mode == "differential": differential_mode = True node_backup = storage.get_node_backup( fqdn=config.storage.fqdn, name=backup_name, differential_mode=differential_mode ) if node_backup.exists(): raise IOError('Error: Backup {} already exists'.format(backup_name)) # Make sure that priority remains to Cassandra/limiting backups resource usage try: throttle_backup() except Exception: logging.warning("Throttling backup impossible. It's probable that ionice is not available.") logging.info('Saving tokenmap and schema') schema, tokenmap = get_schema_and_tokenmap(cassandra) node_backup.schema = schema node_backup.tokenmap = json.dumps(tokenmap) if differential_mode is True: node_backup.differential = mode add_backup_start_to_index(storage, node_backup) if stagger_time: stagger_end = start + stagger_time logging.info('Staggering backup run, trying until {}'.format(stagger_end)) while not stagger(config.storage.fqdn, storage, tokenmap): if datetime.datetime.now() < stagger_end: logging.info('Staggering this backup run...') time.sleep(60) else: raise IOError('Backups on previous nodes did not complete' ' within our stagger time.') actual_start = datetime.datetime.now() num_files, node_backup_cache = do_backup( cassandra, node_backup, storage, differential_mode, config, backup_name) end = datetime.datetime.now() actual_backup_duration = end - actual_start print_backup_stats(actual_backup_duration, actual_start, end, node_backup, node_backup_cache, num_files, start) update_monitoring(actual_backup_duration, backup_name, monitoring, node_backup) return (actual_backup_duration, actual_start, end, node_backup, node_backup_cache, num_files, start) except Exception as e: tags = ['medusa-node-backup', 'backup-error', backup_name] monitoring.send(tags, 1) medusa.utils.handle_exception( e, "This error happened during the backup: {}".format(str(e)), config )
def test_cassandra_missing_native_port(self): c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-missing-native-port.yaml')) # Case where cient_encryption_options enabled, but no native ports defined. self.assertEqual(c.native_port, 9142)
class BackupJob(object): def __init__(self, config, backup_name, seed_target, stagger, enable_md5_checks, mode, temp_dir, parallel_snapshots, parallel_uploads): self.id = uuid.uuid4() # TODO expose the argument below (Note that min(1000, <number_of_hosts>) will be used) self.orchestration_snapshots = Orchestration(config, parallel_snapshots) self.orchestration_uploads = Orchestration(config, parallel_uploads) self.config = config self.backup_name = backup_name self.stagger = stagger self.seed_target = seed_target self.enable_md5_checks = enable_md5_checks self.mode = mode self.temp_dir = temp_dir self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id) self.hosts = {} self.cassandra = Cassandra(config) self.snapshot_tag = '{}{}'.format(self.cassandra.SNAPSHOT_PREFIX, self.backup_name) fqdn_resolver = medusa.config.evaluate_boolean( self.config.cassandra.resolve_ip_addresses) self.fqdn_resolver = HostnameResolver(fqdn_resolver) def execute(self): # Two step: Take snapshot everywhere, then upload the backups to the external storage # Getting the list of Cassandra nodes. seed_target = self.seed_target if self.seed_target is not None else self.config.storage.fqdn session_provider = CqlSessionProvider([seed_target], self.config.cassandra) with session_provider.new_session() as session: tokenmap = session.tokenmap() self.hosts = [host for host in tokenmap.keys()] # First let's take a snapshot on all nodes at once # Here we will use parallelism of min(number of nodes, parallel_snapshots) logging.info('Creating snapshots on all nodes') self._create_snapshots() # Second logging.info('Uploading snapshots from nodes to external storage') self._upload_backup() def _create_snapshots(self): # Run snapshot in parallel on all nodes, create_snapshot_command = ' '.join( self.cassandra.create_snapshot_command(self.backup_name)) pssh_run_success = self.orchestration_snapshots.\ pssh_run(self.hosts, create_snapshot_command, hosts_variables={}) if not pssh_run_success: # we could implement a retry. err_msg = 'Some nodes failed to create the snapshot.' logging.error(err_msg) raise Exception(err_msg) logging.info('A snapshot {} was created on all nodes.'.format( self.snapshot_tag)) def _upload_backup(self): backup_command = self._build_backup_cmd() # Run upload in parallel or sequentially according to parallel_uploads defined by the user pssh_run_success = self.orchestration_uploads.pssh_run( self.hosts, backup_command, hosts_variables={}) if not pssh_run_success: # we could implement a retry. err_msg = 'Some nodes failed to upload the backup.' logging.error(err_msg) raise Exception(err_msg) logging.info('A new backup {} was created on all nodes.'.format( self.backup_name)) def _build_backup_cmd(self): stagger_option = '--in-stagger {}'.format( self.stagger) if self.stagger else '' enable_md5_checks_option = '--enable-md5-checks' if self.enable_md5_checks else '' # Use %s placeholders in the below command to have them replaced by pssh using per host command substitution command = 'mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} -vvv backup-node ' \ '--backup-name {backup_name} {stagger} {enable_md5_checks} --mode {mode}' \ .format(work=self.work_dir, sudo='sudo' if medusa.utils.evaluate_boolean(self.config.cassandra.use_sudo) else '', config=f'--config-file {self.config.file_path}' if self.config.file_path else '', backup_name=self.backup_name, stagger=stagger_option, enable_md5_checks=enable_md5_checks_option, mode=self.mode) logging.debug( 'Running backup on all nodes with the following command {}'.format( command)) return command
def restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables): storage.storage_driver.prepare_download() differential_blob = storage.storage_driver.get_blob( os.path.join(config.storage.fqdn, backup_name, 'meta', 'differential')) node_backup = storage.get_node_backup( fqdn=config.storage.fqdn, name=backup_name, differential_mode=True if differential_blob is not None else False) if not node_backup.exists(): logging.error('No such backup') sys.exit(1) fqtns_to_restore, ignored_fqtns = filter_fqtns(keyspaces, tables, node_backup.manifest) for fqtns in ignored_fqtns: logging.info('Skipping restore of {}'.format(fqtns)) if len(fqtns_to_restore) == 0: logging.error('There is nothing to restore') sys.exit(0) cassandra = Cassandra(config) # Download the backup download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4()) logging.info('Downloading data from backup to {}'.format(download_dir)) download_data(config.storage, node_backup, fqtns_to_restore, destination=download_dir) if not medusa.utils.evaluate_boolean(config.kubernetes.enabled): logging.info('Stopping Cassandra') cassandra.shutdown() wait_for_node_to_go_down(config, cassandra.hostname) # Clean the commitlogs, the saved cache to prevent any kind of conflict # especially around system tables. use_sudo = not medusa.utils.evaluate_boolean(config.kubernetes.enabled) clean_path(cassandra.commit_logs_path, use_sudo, keep_folder=True) clean_path(cassandra.saved_caches_path, use_sudo, keep_folder=True) # move backup data to Cassandra data directory according to system table logging.info('Moving backup data to Cassandra data directory') manifest = json.loads(node_backup.manifest) for section in manifest: fqtn = "{}.{}".format(section['keyspace'], section['columnfamily']) if fqtn not in fqtns_to_restore: logging.debug('Skipping restore for {}'.format(fqtn)) continue maybe_restore_section(section, download_dir, cassandra.root, in_place, keep_auth, use_sudo) node_fqdn = storage.config.fqdn token_map_file = download_dir / 'tokenmap.json' with open(str(token_map_file), 'r') as f: tokens = get_node_tokens(node_fqdn, f) logging.debug("Parsed tokens: {}".format(tokens)) # possibly wait for seeds # # In a Kubernetes deployment we can assume that seed nodes will be started first. It will # handled either by the statefulset controller or by the controller of a Cassandra # operator. if not medusa.utils.evaluate_boolean(config.kubernetes.enabled): if seeds is not None: wait_for_seeds(config, seeds) else: logging.info('No --seeds specified so we will not wait for any') # Start up Cassandra logging.info('Starting Cassandra') # restoring in place retains system.local, which has tokens in it. no need to specify extra if in_place: cassandra.start_with_implicit_token() else: cassandra.start(tokens) # Clean the restored data from local temporary folder clean_path(download_dir, use_sudo, keep_folder=False) return node_backup