Beispiel #1
0
def get_task_snapshots(datasets: [str], task: PeriodicSnapshotTask, snapshot_name: str):
    if task.recursive:
        return [
            Snapshot(dataset, snapshot_name)
            for dataset in datasets
            if is_child(dataset, task.dataset) and not should_exclude(dataset, task.exclude)
        ]
    else:
        return [Snapshot(task.dataset, snapshot_name)]
Beispiel #2
0
def test__destroy_snapshots__works():
    shell = Mock()

    destroy_snapshots(shell, [Snapshot("data", "snap-1"), Snapshot("data/work", "snap-1"), Snapshot("data", "snap-2")])

    assert shell.exec.call_count == 2
    shell.exec.assert_has_calls([
        call(["zfs", "destroy", "data@snap-1%snap-2"]),
        call(["zfs", "destroy", "data/work@snap-1"])
    ], True)
Beispiel #3
0
def test__destroy_snapshots__arg_max():
    shell = Mock()

    with patch("zettarepl.snapshot.destroy.ARG_MAX", 20):
        destroy_snapshots(shell, [Snapshot("data", "snap-1"),
                                  Snapshot("data", "snap-2"),
                                  Snapshot("data", "snap-3")])

    assert shell.exec.call_count == 2
    shell.exec.assert_has_calls([
        call(["zfs", "destroy", "data@snap-1,snap-2"]),
        call(["zfs", "destroy", "data@snap-3"])
    ], True)
def test_source_retention_multiple_sources():
    subprocess.call("zfs destroy -r data/src", shell=True)
    subprocess.call("zfs destroy -r data/dst", shell=True)

    subprocess.check_call("zfs create data/src", shell=True)
    subprocess.check_call("zfs create data/src/a", shell=True)
    subprocess.check_call("zfs create data/src/b", shell=True)
    subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True)

    subprocess.check_call("zfs create data/dst", shell=True)
    subprocess.check_call("zfs create data/dst/a", shell=True)
    subprocess.check_call("zfs create data/dst/b", shell=True)
    subprocess.check_call("zfs snapshot -r data/dst@2018-10-01_00-00", shell=True)
    subprocess.check_call("zfs snapshot -r data/dst@2018-10-01_01-00", shell=True)
    subprocess.check_call("zfs snapshot -r data/dst@2018-10-01_02-00", shell=True)

    definition = Definition.from_data(yaml.safe_load(textwrap.dedent("""\
        timezone: "UTC"

        periodic-snapshot-tasks:
          src:
            dataset: data/src
            recursive: true
            lifetime: PT1H
            naming-schema: "%Y-%m-%d_%H-%M"
            schedule:
              minute: "0"

        replication-tasks:
          src:
            direction: push
            transport:
              type: local
            source-dataset: [data/src/a, data/src/b]
            target-dataset: data/dst
            recursive: false
            periodic-snapshot-tasks:
              - src
            auto: true
            retention-policy: source
            hold-pending-snapshots: true
    """)))

    local_shell = LocalShell()
    zettarepl = Zettarepl(Mock(), local_shell)
    zettarepl.set_tasks(definition.tasks)
    zettarepl._run_remote_retention(datetime(2018, 10, 1, 3, 0))

    assert list_snapshots(local_shell, "data/dst/a", False) == [Snapshot("data/dst/a", "2018-10-01_02-00")]
    assert list_snapshots(local_shell, "data/dst/b", False) == [Snapshot("data/dst/b", "2018-10-01_02-00")]
Beispiel #5
0
def test__create_snapshot__zfscli_no_properties():
    shell = Mock()

    create_snapshot(shell, Snapshot("data/src", "snap-1"), True, [], {})

    shell.exec.assert_called_once_with(
        ["zfs", "snapshot", "-r", "data/src@snap-1"])
Beispiel #6
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        tasks_with_snapshot_names = sorted(
            [(task, now.strftime(task.naming_schema)) for task in tasks],
            key=lambda task_with_snapshot_name: (
                # Lexicographically less snapshots should go first
                task_with_snapshot_name[1],
                # Recursive snapshot with same name as non-recursive should go first
                0 if task_with_snapshot_name[0].recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not task_with_snapshot_name[0].exclude else 1,
            ))

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                continue

            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
Beispiel #7
0
def run_replication_steps(step_templates: [ReplicationStepTemplate],
                          observer=None):
    for step_template in step_templates:
        src_snapshots = step_template.src_context.datasets[
            step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(
            step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(
            src_snapshots, dst_snapshots, step_template.replication_task)
        if incremental_base is None and dst_snapshots:
            if step_template.replication_task.allow_from_scratch:
                logger.warning(
                    "No incremental base for replication task %r on dataset %r, destroying all destination "
                    "snapshots", step_template.replication_task.id,
                    step_template.src_dataset)
                destroy_snapshots(step_template.dst_context.shell, [
                    Snapshot(step_template.dst_dataset, name)
                    for name in dst_snapshots
                ])
            else:
                raise NoIncrementalBaseReplicationError(
                    f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                    f"is not allowed")

        if not snapshots:
            logger.info(
                "No snapshots to send for replication task %r on dataset %r",
                step_template.replication_task.id, step_template.src_dataset)
            continue

        replicate_snapshots(step_template, incremental_base, snapshots,
                            observer)
Beispiel #8
0
def test_does_not_remove_the_last_snapshot_left():
    subprocess.call("zfs destroy -r data/src", shell=True)

    subprocess.check_call("zfs create data/src", shell=True)
    subprocess.check_call("zfs snapshot data/src@2020-05-07_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2020-05-23_00-00", shell=True)

    data = yaml.safe_load(
        textwrap.dedent("""\
        timezone: "UTC"

        periodic-snapshot-tasks:
          src:
            dataset: data/src
            recursive: false
            naming-schema: "%Y-%m-%d_%H-%M"
            schedule:
              minute: "*"
              hour: "*"
              day-of-month: "*"
              month: "*"
              day-of-week: "*"
            lifetime: P30D
    """))
    definition = Definition.from_data(data)

    local_shell = LocalShell()
    zettarepl = Zettarepl(Mock(), local_shell)
    zettarepl.set_tasks(definition.tasks)
    zettarepl._run_local_retention(datetime(2020, 6, 25, 0, 0))

    assert list_snapshots(local_shell, "data/src",
                          False) == [Snapshot("data/src", "2020-05-23_00-00")]
Beispiel #9
0
def test__run_periodic_snapshot_tasks__alphabetical():
    with patch("zettarepl.zettarepl.create_snapshot") as create_snapshot:
        with patch("zettarepl.zettarepl.get_empty_snapshots_for_deletion", Mock(return_value=[])):
            zettarepl = Zettarepl(Mock(), Mock())
            zettarepl._run_periodic_snapshot_tasks(
                datetime(2018, 9, 1, 15, 11),
                [
                    Mock(dataset="data", recursive=False, naming_schema="snap-%Y-%m-%d_%H-%M-2d"),
                    Mock(dataset="data", recursive=False, naming_schema="snap-%Y-%m-%d_%H-%M-1w"),
                ]
            )

            assert create_snapshot.call_count == 2
            create_snapshot.assert_has_calls([
                call(ANY, Snapshot("data", "snap-2018-09-01_15-11-1w"), False, ANY),
                call(ANY, Snapshot("data", "snap-2018-09-01_15-11-2d"), False, ANY),
            ])
Beispiel #10
0
def test__create_snapshot__zcp_ok():
    shell = Mock()
    shell.exec.return_value = "Channel program fully executed with no return value."

    create_snapshot(shell, Snapshot("data/src", "snap-1"), True, ["data/src/garbage", "data/src/temp"])

    shell.exec.assert_called_once_with(["zfs", "program", "data", ANY, "data/src", "snap-1",
                                        "data/src/garbage", "data/src/temp"])
Beispiel #11
0
def test__create_snapshot__zfscli_properties():
    shell = Mock()

    create_snapshot(shell, Snapshot("data/src", "snap-1"), True, [],
                    {"freenas:vmsynced": "Y"})

    shell.exec.assert_called_once_with([
        "zfs", "snapshot", "-r", "-o", "freenas:vmsynced=Y", "data/src@snap-1"
    ])
Beispiel #12
0
def test__calculate_snapshots_to_remove():
    assert calculate_snapshots_to_remove([
        PeriodicSnapshotTaskSnapshotOwner(
            datetime(2019, 5, 30, 21, 52),
            Mock(dataset="dst/work",
                 recursive=False,
                 exclude=[],
                 lifetime=timedelta(days=14),
                 naming_schema="auto-%Y-%m-%d_%H-%M")),
        PeriodicSnapshotTaskSnapshotOwner(
            datetime(2019, 5, 30, 21, 52),
            Mock(dataset="dst/work",
                 recursive=False,
                 exclude=[],
                 lifetime=timedelta(hours=1),
                 naming_schema="snap%d%m%Y%H%M")),
    ], [Snapshot("dst/work", "snap300520191856")]) == [
        Snapshot("dst/work", "snap300520191856")
    ]
Beispiel #13
0
def test__run_periodic_snapshot_tasks__recursive():
    with patch("zettarepl.zettarepl.create_snapshot") as create_snapshot:
        with patch("zettarepl.zettarepl.get_empty_snapshots_for_deletion", Mock(return_value=[])):
            zettarepl = Zettarepl(Mock(), Mock())
            zettarepl._run_periodic_snapshot_tasks(
                datetime(2018, 9, 1, 15, 11),
                [
                    Mock(dataset="data", recursive=False, naming_schema="snap-%Y-%m-%d_%H-%M"),
                    Mock(dataset="data", recursive=True, naming_schema="snap-%Y-%m-%d_%H-%M"),
                ]
            )

            create_snapshot.assert_called_once_with(ANY, Snapshot("data", "snap-2018-09-01_15-11"), True, ANY)
Beispiel #14
0
def calculate_snapshots_to_remove(owners: [SnapshotOwner],
                                  snapshots: [Snapshot]):
    result = []
    for dataset, dataset_snapshots in group_snapshots_by_datasets(
            snapshots).items():
        dataset_owners = [
            owner for owner in owners if owner.owns_dataset(dataset)
        ]
        result.extend([
            Snapshot(dataset, snapshot)
            for snapshot in calculate_dataset_snapshots_to_remove(
                dataset_owners, dataset, dataset_snapshots)
        ])
    return result
Beispiel #15
0
def test__create_snapshot__zcp_errors():
    shell = Mock()
    shell.exec.side_effect = ExecException(1, textwrap.dedent("""\
        Channel program execution failed:
        [string "channel program"]:44: snapshot=data/src/home@snap-1 error=17, snapshot=data/src/work@snap-1 error=17
        stack traceback:
            [C]: in function 'error'
            [string "channel program"]:44: in main chunk
    """))

    with pytest.raises(CreateSnapshotError) as e:
        create_snapshot(shell, Snapshot("data/src", "snap-1"), True, ["data/src/garbage"])

    assert e.value.args[0] == [
        ("data/src/home@snap-1", "File exists"),
        ("data/src/work@snap-1", "File exists"),
    ]
Beispiel #16
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    ignored_roots = set()
    for step_template in step_templates:
        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None and dst_snapshots:
            if step_template.replication_task.allow_from_scratch:
                logger.warning("No incremental base for replication task %r on dataset %r, destroying all destination "
                               "snapshots", step_template.replication_task.id, step_template.src_dataset)
                destroy_snapshots(
                    step_template.dst_context.shell,
                    [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                )
            else:
                raise NoIncrementalBaseReplicationError(
                    f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                    f"is not allowed"
                )

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        replicate_snapshots(step_template, incremental_base, snapshots, observer)
Beispiel #17
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        if (self.replication_process.properties and isinstance(
                exc_val, ExecException
        ) and exc_val.stdout.endswith(
                f"cannot mount '{self.replication_process.target_dataset}': mountpoint or dataset is busy\n"
        )):
            if self.replication_process.direction == ReplicationDirection.PUSH:
                dst_shell = self.replication_process.remote_shell
            else:
                dst_shell = self.replication_process.local_shell

            try:
                snapshots = list_snapshots(
                    dst_shell, self.replication_process.target_dataset, False)
            except Exception as e:
                logger.warning(
                    "Caught 'mountpoint or dataset is busy' and was not able to list snapshots on destination side: "
                    "%r. Assuming replication failure.", e)
                return

            snapshot = Snapshot(self.replication_process.target_dataset,
                                self.replication_process.snapshot)
            if snapshot not in snapshots:
                logger.warning(
                    "Caught 'mountpoint or dataset is busy' and %r does not exist on destination side. "
                    "Assuming replication failure.",
                    snapshot,
                )
                return

            # It's ok, snapshot was transferred successfully, just were not able to mount dataset on specified
            # mountpoint
            logger.info(
                "Caught 'mountpoint or dataset is busy' but %r is present on remote side. "
                "Assuming replication success.",
                snapshot,
            )
            return True
import subprocess
import textwrap
from unittest.mock import Mock

import yaml

from zettarepl.definition.definition import Definition
from zettarepl.snapshot.list import list_snapshots
from zettarepl.snapshot.snapshot import Snapshot
from zettarepl.transport.local import LocalShell
from zettarepl.zettarepl import Zettarepl


@pytest.mark.parametrize("hold_pending_snapshots,remains", [
    (True, [
        Snapshot("data/src", "2018-10-01_01-00"),
        Snapshot("data/src", "2018-10-01_02-00"),
        Snapshot("data/src", "2018-10-01_03-00")
    ]),
    (False, [
        Snapshot("data/src", "2018-10-01_02-00"),
        Snapshot("data/src", "2018-10-01_03-00")
    ]),
])
def test_hold_pending_snapshots(hold_pending_snapshots, remains):
    subprocess.call("zfs destroy -r data/src", shell=True)
    subprocess.call("zfs destroy -r data/dst", shell=True)

    subprocess.check_call("zfs create data/src", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True)
Beispiel #19
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        scheduled_tasks = []
        for task in tasks:
            snapshot_name = get_snapshot_name(now, task.naming_schema)

            try:
                parsed_snapshot_name = parse_snapshot_name(
                    snapshot_name, task.naming_schema)
            except ValueError as e:
                logger.warning(
                    "Unable to parse snapshot name %r with naming schema %r: %s. Skipping task %r",
                    snapshot_name,
                    task.naming_schema,
                    str(e),
                    task,
                )

                notify(
                    self.observer,
                    PeriodicSnapshotTaskError(
                        task.id, "Unable to parse snapshot name %r: %s" % (
                            snapshot_name,
                            str(e),
                        )))
                continue

            scheduled_tasks.append(
                ScheduledPeriodicSnapshotTask(
                    task,
                    snapshot_name,
                    parsed_snapshot_name,
                ))

        scheduled_tasks = sorted(
            scheduled_tasks,
            key=lambda scheduled_task: (
                # Common sorting order
                parsed_snapshot_sort_key(scheduled_task.parsed_snapshot_name),
                # Recursive snapshot with same name as non-recursive should go first
                0 if scheduled_task.task.recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not scheduled_task.task.exclude else 1,
            ))

        tasks_with_snapshot_names = [(scheduled_task.task,
                                      scheduled_task.snapshot_name)
                                     for scheduled_task in scheduled_tasks]

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))
                continue

            options = notify(self.observer, PeriodicSnapshotTaskStart(task.id))
            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude, options.properties)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)

                notify(self.observer,
                       PeriodicSnapshotTaskError(task.id, str(e)))
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
Beispiel #20
0
        (Mock(dataset="data/src", recursive=True, exclude=[],
              allow_empty=True), "snap-1"),
        (Mock(dataset="data/src/work",
              recursive=False,
              exclude=[],
              allow_empty=False), "snap-1"),
    ], []),
    (["data/src", "data/src/garbage", "data/src/work"], [
        (Mock(
            dataset="data/src", recursive=True, exclude=[],
            allow_empty=False), "snap-1"),
        (Mock(dataset="data/src",
              recursive=True,
              exclude=["data/src/garbage"],
              allow_empty=True), "snap-1"),
    ], [Snapshot("data/src/garbage", "snap-1")]),
])
def test__get_empty_snapshots_for_deletion__1(datasets,
                                              tasks_with_snapshot_names,
                                              result):
    with patch("zettarepl.snapshot.empty.list_datasets",
               Mock(return_value=datasets)):
        with patch("zettarepl.snapshot.empty.is_empty_snapshot",
                   Mock(return_value=True)):
            assert get_empty_snapshots_for_deletion(
                Mock(), tasks_with_snapshot_names) == result


@pytest.mark.parametrize("all_datasets,task,task_datasets", [
    (
        ["data/src", "data/src/work", "data/dst"],
Beispiel #21
0
def test_hold_pending_snapshots__does_not_delete_orphan_snapshots():
    subprocess.call("zfs destroy -r data/src", shell=True)
    subprocess.call("zfs destroy -r data/dst", shell=True)

    subprocess.check_call("zfs create data/src", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_02-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-01_03-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-02_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/src@2018-10-03_00-00", shell=True)

    subprocess.check_call("zfs create data/dst", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-01_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-01_01-00", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-01_02-00", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-01_03-00", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-02_00-00", shell=True)
    subprocess.check_call("zfs snapshot data/dst@2018-10-03_00-00", shell=True)

    definition = Definition.from_data(
        yaml.safe_load(
            textwrap.dedent("""\
        timezone: "UTC"

        periodic-snapshot-tasks:
          src:
            dataset: data/src
            recursive: true
            lifetime: PT48H
            naming-schema: "%Y-%m-%d_%H-%M"
            schedule:
              minute: "0"
              hour: "0"

        replication-tasks:
          src:
            direction: push
            transport:
              type: local
            source-dataset: data/src
            target-dataset: data/dst
            recursive: true
            periodic-snapshot-tasks:
              - src
            auto: true
            retention-policy: source
            hold-pending-snapshots: true
    """)))

    local_shell = LocalShell()
    zettarepl = Zettarepl(Mock(), local_shell)
    zettarepl.set_tasks(definition.tasks)
    zettarepl._run_local_retention(datetime(2018, 10, 4, 0, 0))

    assert list_snapshots(local_shell, "data/src", False) == [
        Snapshot("data/src", "2018-10-01_01-00"),
        Snapshot("data/src", "2018-10-01_02-00"),
        Snapshot("data/src", "2018-10-01_03-00"),
        Snapshot("data/src", "2018-10-02_00-00"),
        Snapshot("data/src", "2018-10-03_00-00"),
    ]
Beispiel #22
0
from unittest.mock import Mock, patch

import pytest
import yaml

from zettarepl.definition.definition import Definition
from zettarepl.snapshot.list import list_snapshots
from zettarepl.snapshot.snapshot import Snapshot
from zettarepl.transport.local import LocalShell
from zettarepl.zettarepl import Zettarepl


@pytest.mark.parametrize("snapshots__removal_dates__result", [
    # Does not remove snapshot that is scheduled to be removed later
    (
        [Snapshot("data/src", "2021-02-19-00-00"), Snapshot("data/src", "2021-04-19-00-00")],
        {"data/src@2021-02-19-00-00": datetime(2021, 5, 1, 0, 0)},
        [0, 1],
    ),
    # Does not remove snapshot that was scheduled to be removed later but is kept by someone else
    (
        [Snapshot("data/src", "2021-04-12-00-00"), Snapshot("data/src", "2021-04-19-00-00")],
        {"data/src@2021-02-19-00-00": datetime(2021, 4, 15, 0, 0)},
        [0, 1],
    ),
    # Removes snapshot
    (
        [Snapshot("data/src", "2021-02-19-00-00"), Snapshot("data/src", "2021-04-19-00-00")],
        {"data/src@2021-02-19-00-00": datetime(2021, 4, 1, 0, 0)},
        [1],
    ),
Beispiel #23
0
def test_dst(naming_schemas):
    subprocess.call("zfs destroy -r data/src", shell=True)
    subprocess.call("zfs receive -A data/dst", shell=True)
    subprocess.call("zfs destroy -r data/dst", shell=True)

    subprocess.check_call("zfs create data/src", shell=True)

    definition = yaml.safe_load(
        textwrap.dedent(f"""\
        timezone: "Europe/Moscow"

        periodic-snapshot-tasks:
          task1:
            dataset: data/src
            recursive: true
            naming-schema: "{naming_schemas[0]}"
            schedule:
              minute: "*"
              hour: "*"
              day-of-month: "*"
              month: "*"
              day-of-week: "*"
          task2:
            dataset: data/src
            recursive: true
            naming-schema: "{naming_schemas[1]}"
            schedule:
              minute: "*"
              hour: "*"
              day-of-month: "*"
              month: "*"
              day-of-week: "*"
    """))

    run_periodic_snapshot_test(
        definition,
        datetime(2010, 10, 30, 22, 0, 0,
                 tzinfo=pytz.UTC).astimezone(pytz.timezone("Europe/Moscow")))

    local_shell = LocalShell()
    assert list_snapshots(local_shell, "data/src", False) == [
        Snapshot("data/src", "auto-2010-10-31-02-00"),
        Snapshot("data/src", "auto-2010-10-31-02-00:0400"),
    ]

    run_periodic_snapshot_test(
        definition,
        datetime(2010, 10, 30, 23, 0, 0,
                 tzinfo=pytz.UTC).astimezone(pytz.timezone("Europe/Moscow")),
        False,
    )

    assert list_snapshots(local_shell, "data/src", False) == [
        Snapshot("data/src", "auto-2010-10-31-02-00"),
        Snapshot("data/src", "auto-2010-10-31-02-00:0300"),
        Snapshot("data/src", "auto-2010-10-31-02-00:0400"),
    ]

    definition = yaml.safe_load(
        textwrap.dedent("""\
        timezone: "UTC"

        replication-tasks:
          src:
            direction: push
            transport:
              type: local
            source-dataset: data/src
            target-dataset: data/dst
            recursive: true
            also-include-naming-schema:
            - "auto-%Y-%m-%d-%H-%M"
            - "auto-%Y-%m-%d-%H-%M%z"
            auto: false
            retention-policy: none
            retries: 1
    """))
    run_replication_test(definition)

    assert list_snapshots(local_shell, "data/dst", False) == [
        Snapshot("data/dst", "auto-2010-10-31-02-00"),
        Snapshot("data/dst", "auto-2010-10-31-02-00:0300"),
        Snapshot("data/dst", "auto-2010-10-31-02-00:0400"),
    ]
Beispiel #24
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        m = {}
        valid_errors = ("failed to create mountpoint",
                        "mountpoint or dataset is busy")
        valid_pylibzfs_errors = ("failed to create mountpoint", )
        if (isinstance(exc_val, ExecException) and (
                # Regular zfs CLI
            (
                re_search_to(
                    m,
                    f"cannot mount '(?P<dataset>.+)': (?P<error>({'|'.join(valid_errors)}))\n",
                    exc_val.stdout,
                ) and
                (m["dataset"] == self.replication_process.target_dataset or
                 (m["error"] == "failed to create mountpoint" and m["dataset"].
                  endswith(f"/{self.replication_process.target_dataset}")))
                # py-libzfs
            ) or (re_search_to(
                m,
                f"(?P<error>({'|'.join(valid_pylibzfs_errors)}))\n",
                exc_val.stdout,
            ))) and (self.replication_process.properties if m["error"]
                     == "mountpoint or dataset is busy" else True)):
            if self.replication_process.direction == ReplicationDirection.PUSH:
                dst_shell = self.replication_process.remote_shell
            else:
                dst_shell = self.replication_process.local_shell

            try:
                snapshots = list_snapshots(
                    dst_shell, self.replication_process.target_dataset, False)
            except Exception as e:
                logger.warning(
                    "Caught %r and was not able to list snapshots on destination side: %r. Assuming replication "
                    "failure.", m["error"], e)
                return

            snapshot = Snapshot(self.replication_process.target_dataset,
                                self.replication_process.snapshot)
            if snapshot not in snapshots:
                logger.warning(
                    "Caught %r and %r does not exist on destination side. Assuming replication failure.",
                    m["error"],
                    snapshot,
                )
                return

            # It's ok, snapshot was transferred successfully, just were not able to mount dataset on specified
            # mountpoint
            logger.info(
                "Caught %r but %r is present on remote side. Assuming replication success.",
                m["error"],
                snapshot,
            )
            return True

        if (self.replication_process.incremental_base
                and isinstance(exc_val, ExecException)):
            m = re.search(
                r"could not send (?P<snapshot>.+):\s*"
                r"incremental source \((?P<incremental_base>.+)\) is not earlier than it",
                exc_val.stdout)
            if m:
                text = textwrap.dedent(f"""\
                    Replication cannot continue because existing snapshot
                    {m.group('incremental_base')} is newer than
                    {m.group('snapshot')}, but has an older date
                    in the snapshot name. To resolve the error, rename
                    {m.group('snapshot')} with a date that is older than
                    {m.group('incremental_base')} or delete snapshot
                    {m.group('snapshot')} from both the source and destination.
                """)
                exc_val.stdout = exc_val.stdout.replace(
                    m.group(0),
                    m.group(0) + f"\n{text.rstrip()}")
                return

        if (isinstance(exc_val, ExecException) and
            (re.search(r"cannot send .+:\s*signal received", exc_val.stdout) or
             "cannot receive new filesystem stream: checksum mismatch or incomplete stream"
             in exc_val.stdout)):
            raise RecoverableReplicationError(str(exc_val)) from None
Beispiel #25
0
 def create_recursive_snapshot_with_exclude(self, dataset, snapshot,
                                            exclude):
     create_snapshot(LocalShell(), Snapshot(dataset, snapshot), True,
                     exclude, {})
Beispiel #26
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        m = {}
        valid_errors = ("failed to create mountpoint.*",
                        "mountpoint or dataset is busy")
        valid_pylibzfs_errors = ("failed to create mountpoint.*", )
        if (isinstance(exc_val, ExecException) and (
                # Regular zfs CLI
            (
                re_search_to(
                    m,
                    f"cannot mount '(?P<dataset>.+)': (?P<error>({'|'.join(valid_errors)}))\n",
                    exc_val.stdout,
                ) and
                (m["dataset"] == self.replication_process.target_dataset or
                 (m["error"].startswith("failed to create mountpoint")
                  and m["dataset"].endswith(
                      f"/{self.replication_process.target_dataset}")))
                # py-libzfs
            ) or (re_search_to(
                m,
                f"(?P<error>({'|'.join(valid_pylibzfs_errors)}))\n",
                exc_val.stdout,
            ))) and (self.replication_process.properties if m["error"]
                     == "mountpoint or dataset is busy" else True)):
            if self.replication_process.direction == ReplicationDirection.PUSH:
                dst_shell = self.replication_process.remote_shell
            else:
                dst_shell = self.replication_process.local_shell

            try:
                snapshots = list_snapshots(
                    dst_shell, self.replication_process.target_dataset, False)
            except Exception as e:
                logger.warning(
                    "Caught %r and was not able to list snapshots on destination side: %r. Assuming replication "
                    "failure.", m["error"], e)
                return

            snapshot = Snapshot(self.replication_process.target_dataset,
                                self.replication_process.snapshot)
            if snapshot not in snapshots:
                logger.warning(
                    "Caught %r and %r does not exist on destination side. Assuming replication failure.",
                    m["error"],
                    snapshot,
                )
                return

            # It's ok, snapshot was transferred successfully, just were not able to mount dataset on specified
            # mountpoint
            logger.info(
                "Caught %r but %r is present on remote side. Assuming replication success.",
                m["error"],
                snapshot,
            )
            return True

        if (self.replication_process.incremental_base
                and isinstance(exc_val, ExecException)):
            match = None
            snapshot = None
            incremental_base = None

            # OpenZFS
            m = re.search(
                r"could not send (?P<snapshot>.+):\s*"
                r"incremental source \((?P<incremental_base>.+)\) is not earlier than it",
                exc_val.stdout)
            if m:
                match = m.group(0)
                snapshot = m.group("snapshot")
                incremental_base = m.group("incremental_base")

            # ZoL
            m = re.search(
                r"warning: cannot send (?P<snapshot>.+): not an earlier snapshot from the same fs",
                exc_val.stdout)
            if m:
                match = m.group(0)
                snapshot = m.group("snapshot").strip("'")
                incremental_base = self.replication_process.incremental_base

            if match is not None:
                text = textwrap.dedent(f"""\
                    Replication cannot continue because existing snapshot
                    {incremental_base} is newer than
                    {snapshot}, but has an older date
                    in the snapshot name. To resolve the error, rename
                    {snapshot} with a date that is older than
                    {incremental_base} or delete snapshot
                    {snapshot} from both the source and destination.
                """)
                exc_val.stdout = exc_val.stdout.replace(
                    match, match + f"\n{text.rstrip()}")
                return

        if (isinstance(exc_val, ExecException) and
            (re.search(r"cannot send .+:\s*signal received", exc_val.stdout) or
             "cannot receive new filesystem stream: checksum mismatch or incomplete stream"
             in exc_val.stdout)):
            raise RecoverableReplicationError(str(exc_val)) from None

        if (isinstance(exc_val, ExecException) and (
                # OpenZFS
                re.search(r"cannot send .+: snapshot .+ does not exist",
                          exc_val.stdout) or
                # ZoL
                re.search(r"cannot open '.+@.+': dataset does not exist",
                          exc_val.stdout))):
            raise RecoverableReplicationError(str(exc_val)) from None

        if (isinstance(
                exc_val, ExecException
        ) and "zfs receive -F cannot be used to destroy an encrypted filesystem"
                in exc_val.stdout.strip()):
            if self.replication_process.raw:
                raise ReplicationError(
                    f"Unable to send encrypted dataset {self.replication_process.source_dataset!r} to existing "
                    f"unencrypted or unrelated dataset {self.replication_process.target_dataset!r}"
                ) from None
            else:
                raise ReplicationError(
                    f"Unable to send dataset {self.replication_process.source_dataset!r} to existing unrelated "
                    f"encrypted dataset {self.replication_process.target_dataset!r}"
                ) from None

        if (isinstance(exc_val, ExecException)
                and re.search(r"cannot mount '.+': Insufficient privileges",
                              exc_val.stdout)):
            raise ReplicationError(
                f"{exc_val.stdout.rstrip('.')}. Please make sure replication user has write permissions to its "
                f"parent dataset") from None
import subprocess
import textwrap
from unittest.mock import Mock

import yaml

from zettarepl.definition.definition import Definition
from zettarepl.snapshot.list import list_snapshots
from zettarepl.snapshot.snapshot import Snapshot
from zettarepl.transport.local import LocalShell
from zettarepl.zettarepl import Zettarepl


@pytest.mark.parametrize("retention_policy,remains", [
    ({"retention-policy": "source"}, [
        Snapshot("data/dst", "2018-10-01_01-00"),
        Snapshot("data/dst", "2018-10-01_02-00"),
        Snapshot("data/dst", "2018-10-01_03-00")
    ]),
    ({"retention-policy": "custom", "lifetime": "PT1H"}, [
        Snapshot("data/dst", "2018-10-01_02-00"),
        Snapshot("data/dst", "2018-10-01_03-00")
    ]),
    ({"retention-policy": "none"}, [
        Snapshot("data/dst", "2018-10-01_00-00"),
        Snapshot("data/dst", "2018-10-01_01-00"),
        Snapshot("data/dst", "2018-10-01_02-00"),
        Snapshot("data/dst", "2018-10-01_03-00")
    ]),
])
def test_push_remote_retention(retention_policy, remains):
Beispiel #28
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    for step_template in step_templates:
        if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE:
            if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True):
                raise ReplicationError(
                    f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, "
                    "but replication task is set up to require this property. Refusing to replicate."
                )

    plan = []
    ignored_roots = set()
    for i, step_template in enumerate(step_templates):
        is_immediate_target_dataset = i == 0

        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None:
            if dst_snapshots:
                if step_template.replication_task.allow_from_scratch:
                    logger.warning(
                        "No incremental base for replication task %r on dataset %r, destroying all destination "
                        "snapshots", step_template.replication_task.id, step_template.src_dataset,
                    )
                    destroy_snapshots(
                        step_template.dst_context.shell,
                        [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                    )
                else:
                    raise NoIncrementalBaseReplicationError(
                        f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                        f"is not allowed"
                    )
            else:
                if not step_template.replication_task.allow_from_scratch:
                    if is_immediate_target_dataset:
                        # We are only interested in checking target datasets, not their children

                        allowed_empty_children = []
                        if step_template.replication_task.recursive:
                            allowed_dst_child_datasets = {
                                get_target_dataset(step_template.replication_task, dataset)
                                for dataset in (
                                    set(step_template.src_context.datasets) -
                                    set(step_template.replication_task.exclude)
                                )
                                if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset)
                            }
                            existing_dst_child_datasets = {
                                dataset
                                for dataset in step_template.dst_context.datasets
                                if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset)
                            }
                            allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets)

                        ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset,
                                           allowed_empty_children)

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            # Target dataset does not exist, there is a chance that intermediate datasets also do not exist
            parent = os.path.dirname(step_template.dst_dataset)
            if "/" in parent:
                create_dataset(step_template.dst_context.shell, parent)

        encryption = None
        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            encryption = step_template.replication_task.encryption

        step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots)
        plan.append((step_template, incremental_base, snapshots, encryption))

    for step_template, incremental_base, snapshots, encryption in plan:
        replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer)
        handle_readonly(step_template)