def latest_node_backup(self, *, fqdn):
        index_path = 'index/latest_backup/{}/backup_name.txt'.format(fqdn)
        try:
            latest_backup_name = self.storage_driver.get_blob_content_as_string(
                index_path)
            differential_blob = self.storage_driver.get_blob(
                '{}/{}/meta/differential'.format(fqdn, latest_backup_name))
            # Should be removed after while. Here for backwards compatibility.
            incremental_blob = self.storage_driver.get_blob(
                '{}/{}/meta/incremental'.format(fqdn, latest_backup_name))

            node_backup = NodeBackup(
                storage=self,
                fqdn=fqdn,
                name=latest_backup_name,
                differential_blob=differential_blob
                if differential_blob is not None else incremental_blob)

            if not node_backup.exists():
                logging.warning(
                    'Latest backup points to non-existent backup. Deleting the marker'
                )
                self.remove_latest_backup_marker(fqdn)
                raise Exception

            return node_backup

        except Exception:
            logging.info('Node {} does not have latest backup'.format(fqdn))
            return None
 def get_node_backup(self, *, fqdn, name, differential_mode=False):
     return NodeBackup(
         storage=self,
         name=name,
         fqdn=fqdn,
         differential_mode=differential_mode
     )
Exemple #3
0
    def discover_node_backups(self, *, fqdn=None):
        """
        Discovers nodes backups by traversing data folders.
        This operation is very taxing for cloud backends and should be avoided.
        We keep it in the codebase for the sole reason of allowing the compute-backup-indices to work.
        """
        def get_backup_name_from_blob(blob):
            blob_path = pathlib.Path(blob.name)
            if self.prefix_path == '':
                fqdn, name, *_ = blob_path.parts
            else:
                _, fqdn, name, *_ = blob_path.parts
            return fqdn, name

        def is_schema_blob(blob):
            return blob.name.endswith('/schema.cql')

        def includes_schema_blob(blobs):
            return any(map(is_schema_blob, blobs))

        prefix_path = fqdn if fqdn else ''

        logging.debug("Listing blobs with prefix '{}'".format(prefix_path))

        storage_objects = filter(
            lambda blob: "meta" in blob.name,
            self.storage_driver.list_objects(path=prefix_path))

        all_blobs = sorted(storage_objects, key=operator.attrgetter('name'))

        logging.debug("Finished listing blobs")

        for (fqdn, backup_name), blobs in itertools.groupby(
                all_blobs, key=get_backup_name_from_blob):
            # consume the _blobs_ iterator into a list because we need to traverse it twice
            backup_blobs = list(blobs)
            if includes_schema_blob(backup_blobs):
                logging.debug("Found backup {}.{}".format(fqdn, backup_name))
                yield NodeBackup(storage=self,
                                 fqdn=fqdn,
                                 name=backup_name,
                                 preloaded_blobs=backup_blobs)
    def list_node_backups(self, *, fqdn=None, backup_index_blobs=None):
        """
        Lists node backups using the index.
        If there is no backup index, no backups will be found.
        Use discover_node_backups to discover backups from the data folders.
        """
        def is_tokenmap_file(blob):
            return "tokenmap" in blob.name

        def get_blob_name(blob):
            return blob.name

        def get_all_backup_blob_names(blobs):
            # if the tokenmap file exists, we assume the whole backup exists too
            all_backup_blobs = filter(is_tokenmap_file, blobs)
            return list(map(get_blob_name, all_backup_blobs))

        def get_blobs_for_fqdn(blobs, fqdn):
            return list(filter(lambda b: fqdn in b, blobs))

        if backup_index_blobs is None:
            backup_index_blobs = self.list_backup_index_blobs()

        blobs_by_backup = self.group_backup_index_by_backup_and_node(
            backup_index_blobs)

        all_backup_blob_names = get_all_backup_blob_names(backup_index_blobs)

        if len(all_backup_blob_names) == 0:
            logging.info(
                'No backups found in index. Consider running "medusa build-index" if you have some backups'
            )

        # possibly filter out backups only for given fqdn
        if fqdn is not None:
            relevant_backup_names = get_blobs_for_fqdn(all_backup_blob_names,
                                                       fqdn)
        else:
            relevant_backup_names = all_backup_blob_names

        # use the backup names and fqdns from index entries to construct NodeBackup objects
        node_backups = list()
        for backup_index_entry in relevant_backup_names:
            _, _, backup_name, tokenmap_file = backup_index_entry.split('/')
            # tokenmap file is in format 'tokenmap_fqdn.json'
            tokenmap_fqdn = self.get_fqdn_from_any_index_blob(tokenmap_file)
            manifest_blob, schema_blob, tokenmap_blob = None, None, None
            started_blob, finished_blob = None, None
            started_timestamp, finished_timestamp = None, None
            if tokenmap_fqdn in blobs_by_backup[backup_name]:
                manifest_blob = self.lookup_blob(blobs_by_backup, backup_name,
                                                 tokenmap_fqdn, 'manifest')
                schema_blob = self.lookup_blob(blobs_by_backup, backup_name,
                                               tokenmap_fqdn, 'schema')
                tokenmap_blob = self.lookup_blob(blobs_by_backup, backup_name,
                                                 tokenmap_fqdn, 'tokenmap')
                started_blob = self.lookup_blob(blobs_by_backup, backup_name,
                                                tokenmap_fqdn, 'started')
                finished_blob = self.lookup_blob(blobs_by_backup, backup_name,
                                                 tokenmap_fqdn, 'finished')
                differential_blob = self.lookup_blob(blobs_by_backup,
                                                     backup_name,
                                                     tokenmap_fqdn,
                                                     'differential')
                # Should be removed after while. Here for backwards compatibility.
                incremental_blob = self.lookup_blob(blobs_by_backup,
                                                    backup_name, tokenmap_fqdn,
                                                    'incremental')
                if started_blob is not None:
                    started_timestamp = self.get_timestamp_from_blob_name(
                        started_blob.name)
                else:
                    started_timestamp = None
                if finished_blob is not None:
                    finished_timestamp = self.get_timestamp_from_blob_name(
                        finished_blob.name)
                else:
                    finished_timestamp = None

            nb = NodeBackup(
                storage=self,
                fqdn=tokenmap_fqdn,
                name=backup_name,
                manifest_blob=manifest_blob,
                schema_blob=schema_blob,
                tokenmap_blob=tokenmap_blob,
                started_timestamp=started_timestamp,
                started_blob=started_blob,
                finished_timestamp=finished_timestamp,
                finished_blob=finished_blob,
                differential_blob=differential_blob
                if differential_blob is not None else incremental_blob)
            node_backups.append(nb)

        # once we have all the backups, we sort them by their start time. we get oldest ones first
        sorted_node_backups = sorted(
            # before sorting the backups, ensure we can work out at least their start time
            filter(lambda nb: nb.started is not None, node_backups),
            key=lambda nb: nb.started)

        # then, before returning the backups, we pick only the existing ones
        previous_existed = False
        for node_backup in sorted_node_backups:

            # we try to be smart here - once we have seen an existing one, we assume all later ones exist too
            if previous_existed:
                yield node_backup
                continue

            # the idea is to save .exist() calls as they actually go to the storage backend and cost something
            # this is mostly meant to handle the transition period when backups expire before the index does,
            # which is a consequence of the transition period and running the build-index command

            if node_backup.exists():
                previous_existed = True
                yield node_backup
            else:
                logging.debug(
                    'Backup {} for fqdn {} present only in index'.format(
                        node_backup.name, node_backup.fqdn))
                # if a backup doesn't exist, we should remove its entry from the index too
                try:
                    self.remove_backup_from_index(node_backup)
                except InvalidCredsError:
                    logging.debug(
                        'This account cannot perform the cleanup_storage'
                        '{} for fqdn {} present only in index.'
                        'Ignoring and continuing...'.format(
                            node_backup.name, node_backup.fqdn))