Exemple #1
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        tasks_with_snapshot_names = sorted(
            [(task, now.strftime(task.naming_schema)) for task in tasks],
            key=lambda task_with_snapshot_name: (
                # Lexicographically less snapshots should go first
                task_with_snapshot_name[1],
                # Recursive snapshot with same name as non-recursive should go first
                0 if task_with_snapshot_name[0].recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not task_with_snapshot_name[0].exclude else 1,
            ))

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                continue

            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
Exemple #2
0
    def _run_remote_retention(self, now: datetime):
        push_replication_tasks = list(
            filter(self._is_push_replication_task,
                   select_by_class(ReplicationTask, self.tasks)))
        local_snapshots_grouped = group_snapshots_by_datasets(
            multilist_snapshots(
                self.local_shell,
                replication_tasks_source_datasets_queries(
                    push_replication_tasks)))
        for transport, replication_tasks in self._transport_for_replication_tasks(
                push_replication_tasks):
            shell = self._get_shell(transport)
            remote_snapshots = multilist_snapshots(
                shell,
                [(replication_task.target_dataset, replication_task.recursive)
                 for replication_task in replication_tasks])
            remote_snapshots_grouped = group_snapshots_by_datasets(
                remote_snapshots)
            owners = [
                ExecutedReplicationTaskSnapshotOwner(now, replication_task,
                                                     local_snapshots_grouped,
                                                     remote_snapshots_grouped)
                for replication_task in replication_tasks
            ]

            snapshots_to_destroy = calculate_snapshots_to_remove(
                owners, remote_snapshots)
            logger.info("Retention on transport %r destroying snapshots: %r",
                        transport, snapshots_to_destroy)
            destroy_snapshots(shell, snapshots_to_destroy)
Exemple #3
0
def test_zfs_hold(hold):
    try:
        subprocess.call("zfs destroy -r data/src", shell=True)
        subprocess.call("zfs destroy -r data/dst", shell=True)

        subprocess.check_call("zfs create data/dst", shell=True)
        for snapshot in snapshots:
            subprocess.check_call(
                f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True)
        for i in hold:
            snapshot = snapshots[i]
            subprocess.check_call(
                f"zfs hold keep {snapshot.dataset}@{snapshot.name}",
                shell=True)

        local_shell = LocalShell()
        destroy_snapshots(local_shell, snapshots)

        assert list_snapshots(local_shell, "data/dst",
                              False) == [snapshots[i] for i in hold]
    finally:
        for snapshot in snapshots:
            subprocess.call(
                f"zfs release keep {snapshot.dataset}@{snapshot.name}",
                shell=True)
Exemple #4
0
def run_replication_steps(step_templates: [ReplicationStepTemplate],
                          observer=None):
    for step_template in step_templates:
        src_snapshots = step_template.src_context.datasets[
            step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(
            step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(
            src_snapshots, dst_snapshots, step_template.replication_task)
        if incremental_base is None and dst_snapshots:
            if step_template.replication_task.allow_from_scratch:
                logger.warning(
                    "No incremental base for replication task %r on dataset %r, destroying all destination "
                    "snapshots", step_template.replication_task.id,
                    step_template.src_dataset)
                destroy_snapshots(step_template.dst_context.shell, [
                    Snapshot(step_template.dst_dataset, name)
                    for name in dst_snapshots
                ])
            else:
                raise NoIncrementalBaseReplicationError(
                    f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                    f"is not allowed")

        if not snapshots:
            logger.info(
                "No snapshots to send for replication task %r on dataset %r",
                step_template.replication_task.id, step_template.src_dataset)
            continue

        replicate_snapshots(step_template, incremental_base, snapshots,
                            observer)
Exemple #5
0
def test__destroy_snapshots__works():
    shell = Mock()

    destroy_snapshots(shell, [Snapshot("data", "snap-1"), Snapshot("data/work", "snap-1"), Snapshot("data", "snap-2")])

    assert shell.exec.call_count == 2
    shell.exec.assert_has_calls([
        call(["zfs", "destroy", "data@snap-1%snap-2"]),
        call(["zfs", "destroy", "data/work@snap-1"])
    ], True)
Exemple #6
0
def test__destroy_snapshots__arg_max():
    shell = Mock()

    with patch("zettarepl.snapshot.destroy.ARG_MAX", 20):
        destroy_snapshots(shell, [Snapshot("data", "snap-1"),
                                  Snapshot("data", "snap-2"),
                                  Snapshot("data", "snap-3")])

    assert shell.exec.call_count == 2
    shell.exec.assert_has_calls([
        call(["zfs", "destroy", "data@snap-1,snap-2"]),
        call(["zfs", "destroy", "data@snap-3"])
    ], True)
Exemple #7
0
    def _run_remote_retention(self, now: datetime):
        push_replication_tasks = list(
            filter(self._is_push_replication_task,
                   select_by_class(ReplicationTask, self.tasks)))
        local_snapshots_grouped = group_snapshots_by_datasets(
            multilist_snapshots(
                self.local_shell,
                replication_tasks_source_datasets_queries(
                    push_replication_tasks)))
        for transport, replication_tasks in self._transport_for_replication_tasks(
                push_replication_tasks):
            shell = self._get_retention_shell(transport)
            remote_snapshots_queries = [
                (replication_task.target_dataset, replication_task.recursive)
                for replication_task in replication_tasks
            ]
            try:
                # Prevent hanging remote from breaking all the replications
                with ShellTimeoutContext(3600):
                    remote_snapshots = multilist_snapshots(
                        shell, remote_snapshots_queries)
            except Exception as e:
                logger.warning(
                    "Remote retention failed on %r: error listing snapshots: %r",
                    transport, e)
                continue
            remote_snapshots_grouped = group_snapshots_by_datasets(
                remote_snapshots)
            owners = [
                ExecutedReplicationTaskSnapshotOwner(now, replication_task,
                                                     local_snapshots_grouped,
                                                     remote_snapshots_grouped)
                for replication_task in replication_tasks
            ]

            snapshots_to_destroy = calculate_snapshots_to_remove(
                owners, remote_snapshots)
            logger.info("Retention on %r destroying snapshots: %r", transport,
                        snapshots_to_destroy)
            try:
                # Prevent hanging remote from breaking all the replications
                with ShellTimeoutContext(3600):
                    destroy_snapshots(shell, snapshots_to_destroy)
            except Exception as e:
                logger.warning(
                    "Remote retention failed on %r: error destroying snapshots: %r",
                    transport, e)
                continue
Exemple #8
0
def test_zfs_clone():
    subprocess.call("zfs destroy -r data/src", shell=True)
    subprocess.call("zfs destroy -r data/dst", shell=True)

    subprocess.check_call("zfs create data/dst", shell=True)
    for snapshot in snapshots:
        subprocess.check_call(
            f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True)
    subprocess.check_call(
        f"zfs clone {snapshots[1].dataset}@{snapshots[1].name} data/src",
        shell=True)

    local_shell = LocalShell()
    destroy_snapshots(local_shell, snapshots)

    assert list_snapshots(local_shell, "data/dst", False) == [snapshots[1]]
Exemple #9
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    ignored_roots = set()
    for step_template in step_templates:
        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None and dst_snapshots:
            if step_template.replication_task.allow_from_scratch:
                logger.warning("No incremental base for replication task %r on dataset %r, destroying all destination "
                               "snapshots", step_template.replication_task.id, step_template.src_dataset)
                destroy_snapshots(
                    step_template.dst_context.shell,
                    [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                )
            else:
                raise NoIncrementalBaseReplicationError(
                    f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                    f"is not allowed"
                )

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        replicate_snapshots(step_template, incremental_base, snapshots, observer)
Exemple #10
0
    def _run_local_retention(self, now: datetime):
        periodic_snapshot_tasks = select_by_class(PeriodicSnapshotTask,
                                                  self.tasks)
        replication_tasks = select_by_class(ReplicationTask, self.tasks)

        push_replication_tasks_that_can_hold = [
            replication_task for replication_task in replication_tasks
            if replication_task.hold_pending_snapshots
        ]
        pull_replications_tasks = list(
            filter(self._is_pull_replication_task, replication_tasks))

        local_snapshots_queries = []
        local_snapshots_queries.extend([
            (periodic_snapshot_task.dataset, periodic_snapshot_task.recursive)
            for periodic_snapshot_task in periodic_snapshot_tasks
        ])
        local_snapshots_queries.extend(
            replication_tasks_source_datasets_queries(
                push_replication_tasks_that_can_hold))
        local_snapshots_queries.extend([
            (replication_task.target_dataset, replication_task.recursive)
            for replication_task in pull_replications_tasks
        ])
        local_snapshots = multilist_snapshots(self.local_shell,
                                              local_snapshots_queries)
        local_snapshots_grouped = group_snapshots_by_datasets(local_snapshots)

        owners = []
        owners.extend([
            PeriodicSnapshotTaskSnapshotOwner(now, periodic_snapshot_task)
            for periodic_snapshot_task in periodic_snapshot_tasks
        ])

        # These are always only PUSH replication tasks
        for transport, replication_tasks in self._transport_for_replication_tasks(
                push_replication_tasks_that_can_hold):
            shell = self._get_retention_shell(transport)
            owners.extend(
                pending_push_replication_task_snapshot_owners(
                    local_snapshots_grouped, shell, replication_tasks))

        for transport, replication_tasks in self._transport_for_replication_tasks(
                pull_replications_tasks):
            shell = self._get_retention_shell(transport)
            remote_snapshots_queries = replication_tasks_source_datasets_queries(
                replication_tasks)
            try:
                remote_snapshots = multilist_snapshots(
                    shell, remote_snapshots_queries)
            except Exception as e:
                logger.warning(
                    "Local retention failed: error listing snapshots on %r: %r",
                    transport, e)
                return
            remote_snapshots_grouped = group_snapshots_by_datasets(
                remote_snapshots)
            owners.extend([
                executed_pull_replication_task_snapshot_owner(
                    now, replication_task, remote_snapshots_grouped,
                    local_snapshots_grouped)
                for replication_task in replication_tasks
            ])

        snapshots_to_destroy = calculate_snapshots_to_remove(
            owners, local_snapshots)
        logger.info("Retention destroying local snapshots: %r",
                    snapshots_to_destroy)
        destroy_snapshots(self.local_shell, snapshots_to_destroy)
Exemple #11
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        scheduled_tasks = []
        for task in tasks:
            snapshot_name = get_snapshot_name(now, task.naming_schema)

            try:
                parsed_snapshot_name = parse_snapshot_name(
                    snapshot_name, task.naming_schema)
            except ValueError as e:
                logger.warning(
                    "Unable to parse snapshot name %r with naming schema %r: %s. Skipping task %r",
                    snapshot_name,
                    task.naming_schema,
                    str(e),
                    task,
                )

                notify(
                    self.observer,
                    PeriodicSnapshotTaskError(
                        task.id, "Unable to parse snapshot name %r: %s" % (
                            snapshot_name,
                            str(e),
                        )))
                continue

            scheduled_tasks.append(
                ScheduledPeriodicSnapshotTask(
                    task,
                    snapshot_name,
                    parsed_snapshot_name,
                ))

        scheduled_tasks = sorted(
            scheduled_tasks,
            key=lambda scheduled_task: (
                # Common sorting order
                parsed_snapshot_sort_key(scheduled_task.parsed_snapshot_name),
                # Recursive snapshot with same name as non-recursive should go first
                0 if scheduled_task.task.recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not scheduled_task.task.exclude else 1,
            ))

        tasks_with_snapshot_names = [(scheduled_task.task,
                                      scheduled_task.snapshot_name)
                                     for scheduled_task in scheduled_tasks]

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))
                continue

            options = notify(self.observer, PeriodicSnapshotTaskStart(task.id))
            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude, options.properties)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)

                notify(self.observer,
                       PeriodicSnapshotTaskError(task.id, str(e)))
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
Exemple #12
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    for step_template in step_templates:
        if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE:
            if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True):
                raise ReplicationError(
                    f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, "
                    "but replication task is set up to require this property. Refusing to replicate."
                )

    plan = []
    ignored_roots = set()
    for i, step_template in enumerate(step_templates):
        is_immediate_target_dataset = i == 0

        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None:
            if dst_snapshots:
                if step_template.replication_task.allow_from_scratch:
                    logger.warning(
                        "No incremental base for replication task %r on dataset %r, destroying all destination "
                        "snapshots", step_template.replication_task.id, step_template.src_dataset,
                    )
                    destroy_snapshots(
                        step_template.dst_context.shell,
                        [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                    )
                else:
                    raise NoIncrementalBaseReplicationError(
                        f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                        f"is not allowed"
                    )
            else:
                if not step_template.replication_task.allow_from_scratch:
                    if is_immediate_target_dataset:
                        # We are only interested in checking target datasets, not their children

                        allowed_empty_children = []
                        if step_template.replication_task.recursive:
                            allowed_dst_child_datasets = {
                                get_target_dataset(step_template.replication_task, dataset)
                                for dataset in (
                                    set(step_template.src_context.datasets) -
                                    set(step_template.replication_task.exclude)
                                )
                                if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset)
                            }
                            existing_dst_child_datasets = {
                                dataset
                                for dataset in step_template.dst_context.datasets
                                if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset)
                            }
                            allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets)

                        ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset,
                                           allowed_empty_children)

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            # Target dataset does not exist, there is a chance that intermediate datasets also do not exist
            parent = os.path.dirname(step_template.dst_dataset)
            if "/" in parent:
                create_dataset(step_template.dst_context.shell, parent)

        encryption = None
        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            encryption = step_template.replication_task.encryption

        step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots)
        plan.append((step_template, incremental_base, snapshots, encryption))

    for step_template, incremental_base, snapshots, encryption in plan:
        replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer)
        handle_readonly(step_template)