コード例 #1
0
    def test_yaml_token_enforcement_with_tokens_and_autobootstrap(self):
        with open('tests/resources/yaml/original/cassandra_with_tokens.yaml', 'r') as f:
            shutil.copyfile('tests/resources/yaml/original/cassandra_with_tokens_and_autobootstrap.yaml',
                            'tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml')
        config = configparser.ConfigParser(interpolation=None)
        config['cassandra'] = {
            'config_file': os.path.join(os.path.dirname(__file__),
                                        'resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml'),
            'start_cmd': '/etc/init.d/cassandra start',
            'stop_cmd': '/etc/init.d/cassandra stop',
            'is_ccm': '1'
        }

        medusa_config = MedusaConfig(
            storage=None,
            monitoring=None,
            cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']),
            ssh=None,
            checks=None,
            logging=None
        )

        cassandra = Cassandra(medusa_config.cassandra)
        tokens = ['1', '2', '3']
        cassandra.replaceTokensInCassandraYamlAndDisableBootstrap(tokens)

        with open('tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml', 'r') as f:
            modified_yaml = yaml.load(f, Loader=yaml.BaseLoader)
            self.assertEqual(modified_yaml.get('num_tokens'), '3')
            self.assertEqual(modified_yaml.get('initial_token'), '1,2,3')
            self.assertEqual(modified_yaml.get('auto_bootstrap'), 'false')
コード例 #2
0
    def test_is_cass_default_ports_valid(self):
        # Not using ccm, directing check for cassandra health.
        medusa_config_v2 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c2.yaml')

        # Not using ccm, directing check for cassandra health.
        medusa_config_v3 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c3.yaml')
        # Not using ccm, directing check for cassandra health.
        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')

        cassandra_v2 = Cassandra(medusa_config_v2, release_version="2")
        cassandra_v3 = Cassandra(medusa_config_v3, release_version="3")
        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")

        assert cassandra_v2.rpc_port == 9160
        assert cassandra_v2.native_port == 9042
        assert cassandra_v2.storage_port == 7000

        assert cassandra_v3.rpc_port == 9160
        assert cassandra_v3.native_port == 9042
        assert cassandra_v3.storage_port == 7000

        assert cassandra_v4.rpc_port == 9160
        assert cassandra_v4.native_port == 9042
        assert cassandra_v4.storage_port == 7000
コード例 #3
0
ファイル: index.py プロジェクト: rubik-ai/cassandra-medusa
def build_indices(config, noop):
    """
    One-off function to construct the backup index.
    This function lists all complete cluster backups and all node backups in them.
    For all node backups found this way, it will find the latest one per node and update index accordingly.
    """
    try:
        storage = medusa.storage.Storage(config=config.storage)
        is_ccm = int(shlex.split(config.cassandra.is_ccm)[0])
        all_backups = []

        if is_ccm != 1:
            cassandra = Cassandra(config.cassandra)
            with cassandra.new_session() as cql_session:
                tokenmap = cql_session.tokenmap()
            for fqdn in tokenmap.keys():
                logging.info("processing {}".format(fqdn))
                all_backups = all_backups + list(
                    storage.discover_node_backups(fqdn=fqdn))
        else:
            all_backups = list(storage.discover_node_backups())

        latest_node_backups = dict()

        if noop:
            logging.info('--noop was set, will only print the indices')

        for node_backup in all_backups:
            # if we are dealing with a complete backup
            if node_backup.finished is not None:
                # check if this backup is newer than what was seen so far
                latest = latest_node_backups.get(node_backup.fqdn, node_backup)
                if node_backup.finished >= latest.finished:
                    latest_node_backups[node_backup.fqdn] = node_backup
                # if requested, add the node backup to the index
                logging.debug('Found backup {} from {}'.format(
                    node_backup.name, node_backup.fqdn))
                if not noop:
                    add_backup_start_to_index(storage, node_backup)
                    add_backup_finish_to_index(storage, node_backup)

        # once we have seen all backups, we can set the latest ones as well
        for fqdn, node_backup in latest_node_backups.items():
            logging.debug('Latest backup {} is {}'.format(
                fqdn, node_backup.name))
            if not noop:
                set_latest_backup_in_index(storage, node_backup)

    except Exception:
        traceback.print_exc()
        sys.exit(1)
コード例 #4
0
    def test_cassandra_start(self, subproc_mock, replace_tokens_mock):

        subproc_mock.return_value = None
        replace_tokens_mock.return_value = None
        subproc_mock.call_count = 0
        replace_tokens_mock.call_count = 0

        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')
        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")
        cassandra_v4.start(['test-token'])

        # When start invoked with ccm active, expect both check_output and replace tokens are
        # invoked.
        assert subproc_mock.call_count == 1 and replace_tokens_mock.call_count == 1

        # And, when start invoked with ccm active, expect that only check_output is called
        # as there is no need to replace tokens in c* yaml and disabling bootstrap.
        subproc_mock.call_count = 0
        replace_tokens_mock.call_count = 0

        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="1",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')
        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")
        cassandra_v4.start(['test-token'])
        assert subproc_mock.call_count == 1 and replace_tokens_mock.call_count == 0
コード例 #5
0
    def test_cassandra_client_encryption_enabled_default_port(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt-default.yaml'))

        # Both ports are not assigned, using default
        self.assertEqual(c.native_port, 9142)
        self.assertEqual(c.rpc_port, 9160)
        self.assertEqual(c.storage_port, 7000)
コード例 #6
0
    def test_parsing_custom_seed_provider(self):
        # patch a sample yaml to have a custom seed provider
        with open('tests/resources/yaml/original/cassandra_with_tokens.yaml',
                  'r') as fi:
            yaml_dict = yaml.load(fi, Loader=yaml.FullLoader)
            yaml_dict['seed_provider'] = [{
                'class_name':
                'org.foo.bar.CustomSeedProvider'
            }]
            with open(
                    'tests/resources/yaml/work/cassandra_with_custom_seedprovider.yaml',
                    'w') as fo:
                yaml.safe_dump(yaml_dict, fo)

        # pass the patched yaml to cassandra config
        config = configparser.ConfigParser(interpolation=None)
        config['cassandra'] = {
            'config_file':
            os.path.join(
                os.path.dirname(__file__),
                'resources/yaml/work/cassandra_with_custom_seedprovider.yaml'),
            'start_cmd':
            '/etc/init.d/cassandra start',
            'stop_cmd':
            '/etc/init.d/cassandra stop',
            'is_ccm':
            '1'
        }
        cassandra_config = _namedtuple_from_dict(CassandraConfig,
                                                 config['cassandra'])

        # init cassandra config and check the custom seed provider was ignored
        cassandra = Cassandra(cassandra_config)
        self.assertEqual([], sorted(cassandra.seeds))
コード例 #7
0
 def __init__(self, config, backup_name, stagger, mode, temp_dir, parallel_snapshots, parallel_uploads):
     self.id = uuid.uuid4()
     # TODO expose the argument below (Note that min(1000, <number_of_hosts>) will be used)
     self.orchestration_snapshots = Orchestration(config, parallel_snapshots)
     self.orchestration_uploads = Orchestration(config, parallel_uploads)
     self.config = config
     self.backup_name = backup_name
     self.stagger = stagger
     self.mode = mode
     self.temp_dir = temp_dir
     self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id)
     self.hosts = {}
     self.cassandra = Cassandra(config)
     self.snapshot_tag = '{}{}'.format(self.cassandra.SNAPSHOT_PREFIX, self.backup_name)
     fqdn_resolver = medusa.config.evaluate_boolean(self.config.cassandra.resolve_ip_addresses)
     self.fqdn_resolver = HostnameResolver(fqdn_resolver)
コード例 #8
0
    def test_seed_parsing(self):
        shutil.copyfile('tests/resources/yaml/original/cassandra_with_tokens_and_autobootstrap.yaml',
                        'tests/resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml')
        config = configparser.ConfigParser(interpolation=None)
        config['cassandra'] = {
            'config_file': os.path.join(os.path.dirname(__file__),
                                        'resources/yaml/work/cassandra_with_tokens_and_autobootstrap.yaml'),
            'start_cmd': '/etc/init.d/cassandra start',
            'stop_cmd': '/etc/init.d/cassandra stop',
            'is_ccm': '1'
        }
        config["grpc"] = {
            "enabled": "0"
        }
        config['kubernetes'] = {
            "enabled": "0"
        }
        medusa_config = MedusaConfig(
            file_path=None,
            storage=None,
            monitoring=None,
            cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']),
            ssh=None,
            checks=None,
            logging=None,
            grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']),
            kubernetes=_namedtuple_from_dict(KubernetesConfig, config['kubernetes']),
        )

        cassandra = Cassandra(medusa_config)
        self.assertEqual(["127.0.0.1", "127.0.0.2"], sorted(cassandra.seeds))
コード例 #9
0
    def test_cassandra_client_encryption_enabled_reuse_port(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt.yaml'))

        # Expecting with client_encryption_options enabled, and not defining value for native_transport_port_ssl
        # use native_transport_port value as defined.
        self.assertEqual(c.native_port, 9123)
        self.assertEqual(c.rpc_port, 9160)
        self.assertEqual(c.storage_port, 7000)
コード例 #10
0
    def test_cassandra_internode_encryption_v4_ports(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/'
                                                    'cassandra-internode-encrypt.yaml'), release_version="4")

        # Uses ssl_storage_port value
        self.assertEqual(c.storage_port, 10001)
        self.assertEqual(c.native_port, 9042)
        self.assertEqual(c.rpc_port, 9160)
コード例 #11
0
    def test_cassandra_client_encryption_enabled_ssl_port(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-client-encrypt-sslport.yaml'))

        # server_encryption_options /internode set to'all', with both
        # ports native_transport_port_ssl AND native_transport_port
        # defined.  Expected to use native_transport_port_ssl
        self.assertEqual(c.storage_port, 7001)
        self.assertEqual(c.native_port, 18675)
        self.assertEqual(c.rpc_port, 9160)
コード例 #12
0
    def test_cassandra_non_encrypt_comm_ports(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-no-encrypt.yaml'),
                      release_version="4")

        # no encryption, use provided from storage_port even though having ssl_storage_port as well.
        self.assertEqual(c.storage_port, 15000)

        self.assertEqual(c.native_port, 9777)
        self.assertEqual(c.rpc_port, 9160)
コード例 #13
0
    def test_cassandra_internode_encrypt_v4_default_ports(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/'
                                                    'cassandra-internode-encrypt-default.yaml'), release_version="4")

        # Secure connection desired.
        # Both ssl_storage_port and storage_port are not defined.
        # Based on v4 release version, should default to 7001.
        self.assertEqual(c.storage_port, 7001)
        self.assertEqual(c.native_port, 9042)
        self.assertEqual(c.rpc_port, 9160)
コード例 #14
0
    def test_is_cassandra_v2_healthy(self, fm):
        fm.return_value = True
        host = Mock()

        medusa_config_v2 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c2.yaml')
        cassandra_v2 = Cassandra(medusa_config_v2, release_version="2")

        # When c* version 2 is used, check for the port values.
        self.assertTrue(is_cassandra_healthy("all", cassandra_v2, host))
        assert fm.call_count == 3
コード例 #15
0
    def test_cassandra_internode_encrypt_nossl_v4_default_ports(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/'
                                                    'cassandra-internode-encrypt-nossl-default.yaml'),
                      release_version="4")

        # Secure connection desired.
        # The ssl_storage_port is not defined.
        # Based on being c* v4, this will use the value of the specified storage_port
        self.assertEqual(c.storage_port, 8675)
        self.assertEqual(c.native_port, 9042)
        self.assertEqual(c.rpc_port, 9160)
コード例 #16
0
    def test_is_cassandra_healthy_invalid_input(self):
        host = Mock()
        # Not using ccm, directing check for cassandra health.
        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')
        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")

        # invalid cassandra input
        self.assertFalse(is_cassandra_healthy("all", {}, host))

        # invalid host input
        self.assertFalse(is_cassandra_healthy("all", cassandra_v4, {}))
コード例 #17
0
    def test_is_cassandra_v3_healthy(self, fm):
        fm.return_value = True
        host = Mock()

        # Not using ccm, directing check for cassandra health.
        medusa_config_v3 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c3.yaml')
        cassandra_v3 = Cassandra(medusa_config_v3, release_version="3")

        # When c* version 3 is used, check for the port values.
        self.assertTrue(is_cassandra_healthy("all", cassandra_v3, host))
        assert fm.call_count == 3
コード例 #18
0
def handle_backup(config, backup_name_arg, stagger_time,
                  enable_md5_checks_flag, mode):
    start = datetime.datetime.now()
    backup_name = backup_name_arg or start.strftime('%Y%m%d%H%M')
    monitoring = Monitoring(config=config.monitoring)

    try:
        logging.debug(
            "Starting backup preparations with Mode: {}".format(mode))
        storage = Storage(config=config.storage)
        cassandra = Cassandra(config)

        differential_mode = False
        if mode == "differential":
            differential_mode = True

        node_backup = storage.get_node_backup(
            fqdn=config.storage.fqdn,
            name=backup_name,
            differential_mode=differential_mode)
        if node_backup.exists():
            raise IOError(
                'Error: Backup {} already exists'.format(backup_name))

        # Starting the backup
        logging.info(
            "Starting backup using Stagger: {} Mode: {} Name: {}".format(
                stagger_time, mode, backup_name))
        BackupMan.update_backup_status(backup_name,
                                       BackupMan.STATUS_IN_PROGRESS)
        info = start_backup(storage, node_backup, cassandra, differential_mode,
                            stagger_time, start, mode, enable_md5_checks_flag,
                            backup_name, config, monitoring)
        BackupMan.update_backup_status(backup_name, BackupMan.STATUS_SUCCESS)

        logging.debug("Done with backup, returning backup result information")
        return (info["actual_backup_duration"], info["actual_start_time"],
                info["end_time"], info["node_backup"],
                info["node_backup_cache"], info["num_files"],
                info["start_time"], info["backup_name"])

    except Exception as e:
        logging.error(
            "Issue occurred inside handle_backup Name: {} Error: {}".format(
                backup_name, str(e)))
        BackupMan.update_backup_status(backup_name, BackupMan.STATUS_FAILED)

        tags = ['medusa-node-backup', 'backup-error', backup_name]
        monitoring.send(tags, 1)
        medusa.utils.handle_exception(
            e, "Error occurred during backup: {}".format(str(e)), config)
コード例 #19
0
    def test_yaml_token_enforcement_no_tokens(self):
        with open('tests/resources/yaml/original/cassandra_no_tokens.yaml', 'r') as f:
            shutil.copyfile('tests/resources/yaml/original/cassandra_no_tokens.yaml',
                            'tests/resources/yaml/work/cassandra_no_tokens.yaml')
        config = configparser.ConfigParser(interpolation=None)
        config['cassandra'] = {
            'config_file': os.path.join(os.path.dirname(__file__), 'resources/yaml/work/cassandra_no_tokens.yaml'),
            'start_cmd': '/etc/init.d/cassandra start',
            'stop_cmd': '/etc/init.d/cassandra stop',
            'is_ccm': '1'
        }
        config["grpc"] = {
            "enabled": "0"
        }
        config['kubernetes'] = {
            "enabled": "0"
        }
        medusa_config = MedusaConfig(
            file_path=None,
            storage=None,
            monitoring=None,
            cassandra=_namedtuple_from_dict(CassandraConfig, config['cassandra']),
            ssh=None,
            checks=None,
            logging=None,
            grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']),
            kubernetes=_namedtuple_from_dict(KubernetesConfig, config['kubernetes']),
        )

        cassandra = Cassandra(medusa_config)
        tokens = ['1', '2', '3']
        cassandra.replace_tokens_in_cassandra_yaml_and_disable_bootstrap(tokens)

        with open('tests/resources/yaml/work/cassandra_no_tokens.yaml', 'r') as f:
            modified_yaml = yaml.load(f, Loader=yaml.BaseLoader)
            self.assertEqual(modified_yaml.get('num_tokens'), '3')
            self.assertEqual(modified_yaml.get('initial_token'), '1,2,3')
            self.assertEqual(modified_yaml.get('auto_bootstrap'), 'false')
コード例 #20
0
    def test_is_cassandra_healthy_check_type_unknown(self, is_open):
        host = Mock()
        # Not using ccm, directing check for cassandra health.
        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')
        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")

        is_open.return_value = True
        self.assertTrue(is_cassandra_healthy("not-thrift-and-not-all-check", cassandra_v4, host))
        assert is_open.call_count == 1

        is_open.call_count = 0
        is_open.return_value = False
        self.assertFalse(is_cassandra_healthy("not-thrift-and-not-all-check", cassandra_v4, host))
        assert is_open.call_count == 2
コード例 #21
0
    def test_is_cassandra_healthy_check_types(self, fm):

        host = Mock()

        # Not using ccm, directing check for cassandra health.
        medusa_config_v4 = self.get_simple_medusa_config(is_ccm_active="0",
                                                         yaml_file='resources/yaml/original/default-c4.yaml')

        cassandra_v4 = Cassandra(medusa_config_v4, release_version="4")

        # When c* version 4 is used, check for the port values.
        fm.return_value = True
        self.assertTrue(is_cassandra_healthy("all", cassandra_v4, host))
        self.assertTrue(is_cassandra_healthy("thrift", cassandra_v4, host))
        self.assertTrue(is_cassandra_healthy("unknown", cassandra_v4, host))
コード例 #22
0
    def test_parsing_custom_seed_provider(self):
        # patch a sample yaml to have a custom seed provider
        with open('tests/resources/yaml/original/cassandra_with_tokens.yaml',
                  'r') as fi:
            yaml_dict = yaml.load(fi, Loader=yaml.FullLoader)
            yaml_dict['seed_provider'] = [{
                'class_name':
                'org.foo.bar.CustomSeedProvider'
            }]
            with open(
                    'tests/resources/yaml/work/cassandra_with_custom_seedprovider.yaml',
                    'w') as fo:
                yaml.safe_dump(yaml_dict, fo)

        # pass the patched yaml to cassandra config
        config = configparser.ConfigParser(interpolation=None)
        config['cassandra'] = {
            'config_file':
            os.path.join(
                os.path.dirname(__file__),
                'resources/yaml/work/cassandra_with_custom_seedprovider.yaml'),
            'start_cmd':
            '/etc/init.d/cassandra start',
            'stop_cmd':
            '/etc/init.d/cassandra stop',
            'is_ccm':
            '1'
        }
        config["grpc"] = {"enabled": "0"}
        config['kubernetes'] = {"enabled": "0"}
        medusa_config = MedusaConfig(
            storage=None,
            monitoring=None,
            cassandra=_namedtuple_from_dict(CassandraConfig,
                                            config['cassandra']),
            ssh=None,
            checks=None,
            logging=None,
            grpc=_namedtuple_from_dict(GrpcConfig, config['grpc']),
            kubernetes=_namedtuple_from_dict(KubernetesConfig,
                                             config['kubernetes']),
        )
        cassandra = Cassandra(medusa_config)
        self.assertEqual([], sorted(cassandra.seeds))
コード例 #23
0
def restore_node_sstableloader(config, temp_dir, backup_name, in_place,
                               keep_auth, seeds, storage, keyspaces, tables):
    cassandra = Cassandra(config)
    node_backup = None
    fqdns = config.storage.fqdn.split(",")
    for fqdn in fqdns:
        differential_blob = storage.storage_driver.get_blob(
            os.path.join(fqdn, backup_name, 'meta', 'differential'))

        node_backup = storage.get_node_backup(
            fqdn=fqdn,
            name=backup_name,
            differential_mode=True if differential_blob is not None else False)

        if not node_backup.exists():
            logging.error('No such backup')
            sys.exit(1)

        fqtns_to_restore, ignored_fqtns = filter_fqtns(keyspaces, tables,
                                                       node_backup.manifest)

        for fqtns in ignored_fqtns:
            logging.info('Skipping restore of {}'.format(fqtns))

        if len(fqtns_to_restore) == 0:
            logging.error('There is nothing to restore')
            sys.exit(0)

        # Download the backup
        download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4())
        logging.info('Downloading data from backup to {}'.format(download_dir))
        download_data(config.storage,
                      node_backup,
                      fqtns_to_restore,
                      destination=download_dir)
        invoke_sstableloader(config, download_dir, keep_auth, fqtns_to_restore,
                             cassandra.storage_port)
        logging.info('Finished loading backup from {}'.format(fqdn))

    # Clean the restored data from local temporary folder
    clean_path(download_dir, keep_folder=False)
    return node_backup
コード例 #24
0
 def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify,
              pssh_pool_size, keyspaces={}, tables={}, bypass_checks=False, use_sstableloader=False):
     self.id = uuid.uuid4()
     self.ringmap = None
     self.cluster_backup = cluster_backup
     self.session_provider = None
     self.config = config
     self.host_list = host_list
     self.seed_target = seed_target
     self.keep_auth = keep_auth
     self.verify = verify
     self.in_place = None
     self.temp_dir = temp_dir  # temporary files
     self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id)
     self.host_map = {}  # Map of backup host/target host for the restore process
     self.keyspaces = keyspaces
     self.tables = tables
     self.bypass_checks = bypass_checks
     self.use_sstableloader = use_sstableloader
     self.pssh_pool_size = pssh_pool_size
     self.cassandra = Cassandra(config.cassandra)
コード例 #25
0
    def __init__(self,
                 cluster_backup,
                 config,
                 temp_dir,
                 host_list,
                 seed_target,
                 keep_auth,
                 verify,
                 parallel_restores,
                 keyspaces=None,
                 tables=None,
                 bypass_checks=False,
                 use_sstableloader=False,
                 version_target=None):

        self.id = uuid.uuid4()
        self.ringmap = None
        self.cluster_backup = cluster_backup
        self.session_provider = None
        self.orchestration = Orchestration(config, parallel_restores)
        self.config = config
        self.host_list = host_list
        self.seed_target = seed_target
        self.keep_auth = keep_auth
        self.verify = verify
        self.in_place = None
        self.temp_dir = temp_dir  # temporary files
        self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id)
        self.host_map = {
        }  # Map of backup host/target host for the restore process
        self.keyspaces = keyspaces if keyspaces else {}
        self.tables = tables if tables else {}
        self.bypass_checks = bypass_checks
        self.use_sstableloader = use_sstableloader
        self.pssh_pool_size = parallel_restores
        self.cassandra = Cassandra(config)
        fqdn_resolver = medusa.utils.evaluate_boolean(
            self.config.cassandra.resolve_ip_addresses)
        self.fqdn_resolver = HostnameResolver(fqdn_resolver)
        self._version_target = version_target
コード例 #26
0
def restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth,
                         seeds, storage, keyspaces, tables):
    differential_blob = storage.storage_driver.get_blob(
        os.path.join(config.storage.fqdn, backup_name, 'meta', 'differential'))

    node_backup = storage.get_node_backup(
        fqdn=config.storage.fqdn,
        name=backup_name,
        differential_mode=True if differential_blob is not None else False)

    if not node_backup.exists():
        logging.error('No such backup')
        sys.exit(1)

    fqtns_to_restore = get_fqtns_to_restore(keyspaces, tables,
                                            node_backup.manifest)
    if len(fqtns_to_restore) == 0:
        logging.error('There is nothing to restore')
        sys.exit(0)

    cassandra = Cassandra(config.cassandra)

    # Download the backup
    download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4())
    logging.info('Downloading data from backup to {}'.format(download_dir))
    download_data(config.storage,
                  node_backup,
                  fqtns_to_restore,
                  destination=download_dir)

    logging.info('Stopping Cassandra')
    cassandra.shutdown()

    # Clean the commitlogs, the saved cache to prevent any kind of conflict
    # especially around system tables.
    clean_path(cassandra.commit_logs_path)
    clean_path(cassandra.saved_caches_path)

    # move backup data to Cassandra data directory according to system table
    logging.info('Moving backup data to Cassandra data directory')
    manifest = json.loads(node_backup.manifest)
    for section in manifest:
        fqtn = "{}.{}".format(section['keyspace'], section['columnfamily'])
        if fqtn not in fqtns_to_restore:
            logging.debug('Skipping restore for {}'.format(fqtn))
            continue
        maybe_restore_section(section, download_dir, cassandra.root, in_place,
                              keep_auth)

    node_fqdn = storage.config.fqdn
    token_map_file = download_dir / 'tokenmap.json'
    with open(str(token_map_file), 'r') as f:
        tokens = get_node_tokens(node_fqdn, f)
        logging.debug("Parsed tokens: {}".format(tokens))

    # possibly wait for seeds
    if seeds is not None:
        wait_for_seeds(config, seeds)
    else:
        logging.info('No --seeds specified so we will not wait for any')

    # Start up Cassandra
    logging.info('Starting Cassandra')
    # restoring in place retains system.local, which has tokens in it. no need to specify extra
    if in_place:
        cassandra.start_with_implicit_token()
    else:
        cassandra.start(tokens)

    return node_backup
コード例 #27
0
def main(config, backup_name_arg, stagger_time, mode):
    start = datetime.datetime.now()
    backup_name = backup_name_arg or start.strftime('%Y%m%d%H')
    monitoring = Monitoring(config=config.monitoring)

    try:
        storage = Storage(config=config.storage)
        cassandra = Cassandra(config)

        differential_mode = False
        if mode == "differential":
            differential_mode = True

        node_backup = storage.get_node_backup(
            fqdn=config.storage.fqdn,
            name=backup_name,
            differential_mode=differential_mode
        )

        if node_backup.exists():
            raise IOError('Error: Backup {} already exists'.format(backup_name))

        # Make sure that priority remains to Cassandra/limiting backups resource usage
        try:
            throttle_backup()
        except Exception:
            logging.warning("Throttling backup impossible. It's probable that ionice is not available.")

        logging.info('Saving tokenmap and schema')
        schema, tokenmap = get_schema_and_tokenmap(cassandra)

        node_backup.schema = schema
        node_backup.tokenmap = json.dumps(tokenmap)
        if differential_mode is True:
            node_backup.differential = mode
        add_backup_start_to_index(storage, node_backup)

        if stagger_time:
            stagger_end = start + stagger_time
            logging.info('Staggering backup run, trying until {}'.format(stagger_end))
            while not stagger(config.storage.fqdn, storage, tokenmap):
                if datetime.datetime.now() < stagger_end:
                    logging.info('Staggering this backup run...')
                    time.sleep(60)
                else:
                    raise IOError('Backups on previous nodes did not complete'
                                  ' within our stagger time.')

        actual_start = datetime.datetime.now()

        num_files, node_backup_cache = do_backup(
            cassandra, node_backup, storage, differential_mode, config, backup_name)

        end = datetime.datetime.now()
        actual_backup_duration = end - actual_start

        print_backup_stats(actual_backup_duration, actual_start, end, node_backup, node_backup_cache, num_files, start)

        update_monitoring(actual_backup_duration, backup_name, monitoring, node_backup)
        return (actual_backup_duration, actual_start, end, node_backup, node_backup_cache, num_files, start)

    except Exception as e:
        tags = ['medusa-node-backup', 'backup-error', backup_name]
        monitoring.send(tags, 1)
        medusa.utils.handle_exception(
            e,
            "This error happened during the backup: {}".format(str(e)),
            config
        )
コード例 #28
0
    def test_cassandra_missing_native_port(self):
        c = Cassandra(self.get_simple_medusa_config('resources/yaml/original/cassandra-missing-native-port.yaml'))

        # Case where cient_encryption_options enabled, but no native ports defined.
        self.assertEqual(c.native_port, 9142)
コード例 #29
0
class BackupJob(object):
    def __init__(self, config, backup_name, seed_target, stagger,
                 enable_md5_checks, mode, temp_dir, parallel_snapshots,
                 parallel_uploads):
        self.id = uuid.uuid4()
        # TODO expose the argument below (Note that min(1000, <number_of_hosts>) will be used)
        self.orchestration_snapshots = Orchestration(config,
                                                     parallel_snapshots)
        self.orchestration_uploads = Orchestration(config, parallel_uploads)
        self.config = config
        self.backup_name = backup_name
        self.stagger = stagger
        self.seed_target = seed_target
        self.enable_md5_checks = enable_md5_checks
        self.mode = mode
        self.temp_dir = temp_dir
        self.work_dir = self.temp_dir / 'medusa-job-{id}'.format(id=self.id)
        self.hosts = {}
        self.cassandra = Cassandra(config)
        self.snapshot_tag = '{}{}'.format(self.cassandra.SNAPSHOT_PREFIX,
                                          self.backup_name)
        fqdn_resolver = medusa.config.evaluate_boolean(
            self.config.cassandra.resolve_ip_addresses)
        self.fqdn_resolver = HostnameResolver(fqdn_resolver)

    def execute(self):
        # Two step: Take snapshot everywhere, then upload the backups to the external storage

        # Getting the list of Cassandra nodes.
        seed_target = self.seed_target if self.seed_target is not None else self.config.storage.fqdn
        session_provider = CqlSessionProvider([seed_target],
                                              self.config.cassandra)
        with session_provider.new_session() as session:
            tokenmap = session.tokenmap()
            self.hosts = [host for host in tokenmap.keys()]

        # First let's take a snapshot on all nodes at once
        # Here we will use parallelism of min(number of nodes, parallel_snapshots)
        logging.info('Creating snapshots on all nodes')
        self._create_snapshots()

        # Second
        logging.info('Uploading snapshots from nodes to external storage')
        self._upload_backup()

    def _create_snapshots(self):
        # Run snapshot in parallel on all nodes,
        create_snapshot_command = ' '.join(
            self.cassandra.create_snapshot_command(self.backup_name))
        pssh_run_success = self.orchestration_snapshots.\
            pssh_run(self.hosts,
                     create_snapshot_command,
                     hosts_variables={})
        if not pssh_run_success:
            # we could implement a retry.
            err_msg = 'Some nodes failed to create the snapshot.'
            logging.error(err_msg)
            raise Exception(err_msg)

        logging.info('A snapshot {} was created on all nodes.'.format(
            self.snapshot_tag))

    def _upload_backup(self):
        backup_command = self._build_backup_cmd()
        # Run upload in parallel or sequentially according to parallel_uploads defined by the user
        pssh_run_success = self.orchestration_uploads.pssh_run(
            self.hosts, backup_command, hosts_variables={})
        if not pssh_run_success:
            # we could implement a retry.
            err_msg = 'Some nodes failed to upload the backup.'
            logging.error(err_msg)
            raise Exception(err_msg)

        logging.info('A new backup {} was created on all nodes.'.format(
            self.backup_name))

    def _build_backup_cmd(self):
        stagger_option = '--in-stagger {}'.format(
            self.stagger) if self.stagger else ''
        enable_md5_checks_option = '--enable-md5-checks' if self.enable_md5_checks else ''

        # Use %s placeholders in the below command to have them replaced by pssh using per host command substitution
        command = 'mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} -vvv backup-node ' \
                  '--backup-name {backup_name} {stagger} {enable_md5_checks} --mode {mode}' \
            .format(work=self.work_dir,
                    sudo='sudo' if medusa.utils.evaluate_boolean(self.config.cassandra.use_sudo) else '',
                    config=f'--config-file {self.config.file_path}' if self.config.file_path else '',
                    backup_name=self.backup_name,
                    stagger=stagger_option,
                    enable_md5_checks=enable_md5_checks_option,
                    mode=self.mode)

        logging.debug(
            'Running backup on all nodes with the following command {}'.format(
                command))

        return command
コード例 #30
0
def restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth,
                         seeds, storage, keyspaces, tables):
    storage.storage_driver.prepare_download()
    differential_blob = storage.storage_driver.get_blob(
        os.path.join(config.storage.fqdn, backup_name, 'meta', 'differential'))

    node_backup = storage.get_node_backup(
        fqdn=config.storage.fqdn,
        name=backup_name,
        differential_mode=True if differential_blob is not None else False)

    if not node_backup.exists():
        logging.error('No such backup')
        sys.exit(1)

    fqtns_to_restore, ignored_fqtns = filter_fqtns(keyspaces, tables,
                                                   node_backup.manifest)
    for fqtns in ignored_fqtns:
        logging.info('Skipping restore of {}'.format(fqtns))

    if len(fqtns_to_restore) == 0:
        logging.error('There is nothing to restore')
        sys.exit(0)

    cassandra = Cassandra(config)

    # Download the backup
    download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4())
    logging.info('Downloading data from backup to {}'.format(download_dir))
    download_data(config.storage,
                  node_backup,
                  fqtns_to_restore,
                  destination=download_dir)

    if not medusa.utils.evaluate_boolean(config.kubernetes.enabled):
        logging.info('Stopping Cassandra')
        cassandra.shutdown()
        wait_for_node_to_go_down(config, cassandra.hostname)

    # Clean the commitlogs, the saved cache to prevent any kind of conflict
    # especially around system tables.
    use_sudo = not medusa.utils.evaluate_boolean(config.kubernetes.enabled)
    clean_path(cassandra.commit_logs_path, use_sudo, keep_folder=True)
    clean_path(cassandra.saved_caches_path, use_sudo, keep_folder=True)

    # move backup data to Cassandra data directory according to system table
    logging.info('Moving backup data to Cassandra data directory')
    manifest = json.loads(node_backup.manifest)
    for section in manifest:
        fqtn = "{}.{}".format(section['keyspace'], section['columnfamily'])
        if fqtn not in fqtns_to_restore:
            logging.debug('Skipping restore for {}'.format(fqtn))
            continue
        maybe_restore_section(section, download_dir, cassandra.root, in_place,
                              keep_auth, use_sudo)

    node_fqdn = storage.config.fqdn
    token_map_file = download_dir / 'tokenmap.json'
    with open(str(token_map_file), 'r') as f:
        tokens = get_node_tokens(node_fqdn, f)
        logging.debug("Parsed tokens: {}".format(tokens))

    # possibly wait for seeds
    #
    # In a Kubernetes deployment we can assume that seed nodes will be started first. It will
    # handled either by the statefulset controller or by the controller of a Cassandra
    # operator.
    if not medusa.utils.evaluate_boolean(config.kubernetes.enabled):
        if seeds is not None:
            wait_for_seeds(config, seeds)
        else:
            logging.info('No --seeds specified so we will not wait for any')

        # Start up Cassandra
        logging.info('Starting Cassandra')
        # restoring in place retains system.local, which has tokens in it. no need to specify extra
        if in_place:
            cassandra.start_with_implicit_token()
        else:
            cassandra.start(tokens)

    # Clean the restored data from local temporary folder
    clean_path(download_dir, use_sudo, keep_folder=False)
    return node_backup