Ejemplo n.º 1
0
def destroy_snapshots(shell: Shell, snapshots: [Snapshot]):
    for dataset, snapshots in sortedgroupby(snapshots, lambda snapshot: snapshot.dataset):
        names = {snapshot.name for snapshot in snapshots}

        logger.info("On %r for dataset %r destroying snapshots %r", shell, dataset, names)

        while names:
            chunk = set()
            sum_len = len(dataset)
            for name in sorted(names):
                new_sum_len = sum_len + len(name) + 1
                if new_sum_len >= ARG_MAX:
                    break

                chunk.add(name)
                sum_len = new_sum_len

            args = ["zfs", "destroy", f"{dataset}@" + ",".join(sorted(chunk))]
            try:
                shell.exec(args)
                names -= chunk
            except ExecException as e:
                if m := re.search(r"cannot destroy snapshot .+?@(.+?): dataset is busy", e.stdout):
                    reason = "busy"
                    name = m.group(1)
                elif m := re.search(r"cannot destroy '.+?@(.+?)': snapshot has dependent clones", e.stdout):
                    reason = "cloned"
                    name = m.group(1)
                else:
                    raise

                logger.info("Snapshot %r on dataset %r is %s, skipping", name, dataset, reason)
                names.discard(name)
Ejemplo n.º 2
0
def destroy_snapshots(shell: Shell, snapshots: [Snapshot]):
    for dataset, snapshots in sortedgroupby(snapshots,
                                            lambda snapshot: snapshot.dataset):
        names = [snapshot.name for snapshot in snapshots]

        logger.info("On %r for dataset %r destroying snapshots %r", shell,
                    dataset, names)
        args = ["zfs", "destroy", f"{dataset}@" + "%".join(names)]

        shell.exec(args)
Ejemplo n.º 3
0
def list_datasets(shell: Shell, dataset: str = None, recursive: bool = True):
    args = [
        "zfs", "list", "-t", "filesystem", "-H", "-o", "name", "-s", "name"
    ]
    if recursive:
        args.extend(["-r"])
    else:
        args.extend(["-d", "1"])
    if dataset is not None:
        args.append(dataset)
    return list(filter(None, shell.exec(args).split("\n")))
Ejemplo n.º 4
0
def ensure_has_no_data(shell: Shell, dataset: str):
    try:
        dst_properties = get_properties(
            shell, dataset, {
                "type": str,
                "mounted": bool,
                "mountpoint": str,
                "referenced": int,
                "snapdir": str,
                "used": int,
            })
    except ExecException as e:
        if not ("dataset does not exist" in e.stdout):
            raise
    else:
        if (dst_properties["type"] == "filesystem"
                and dst_properties["mounted"]
                and dst_properties["mountpoint"] != "legacy"):
            try:
                index = shell.ls(dst_properties["mountpoint"])
            except Exception as e:
                logger.warning(
                    "An exception occurred while listing dataset %r mountpoint %r: %r. Assuming dataset is not mounted",
                    dataset,
                    dst_properties["mountpoint"],
                    e,
                )
            else:
                if dst_properties["snapdir"] == "visible" and ".zfs" in index:
                    index.remove(".zfs")

                if index:
                    raise ReplicationError(
                        f"Target dataset {dataset!r} does not have snapshots but has data (e.g. {index[0]!r} and "
                        f"replication from scratch is not allowed. Refusing to overwrite existing data."
                    )

                return

        if dst_properties["type"] == "filesystem":
            used_property = "used"
        elif dst_properties["type"] == "volume":
            used_property = "referenced"
        else:
            raise ReplicationError(
                f"Target dataset {dataset!r} has invalid type {dst_properties['type']!r}"
            )

        # Empty datasets on large pool configurations can have really big size
        if dst_properties[used_property] > 1024 * 1024 * 10:
            raise ReplicationError(
                f"Target dataset {dataset!r} does not have snapshots but has data ({dst_properties[used_property]} "
                f"bytes used) and replication from scratch is not allowed. Refusing to overwrite existing data."
            )
Ejemplo n.º 5
0
def list_snapshots(shell: Shell, dataset: str, recursive: bool) -> [Snapshot]:
    args = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-s", "name"]
    if recursive:
        args.extend(["-r"])
    else:
        args.extend(["-d", "1"])
    args.append(dataset)
    return list(
        map(lambda s: Snapshot(*s.split("@")),
            filter(None,
                   shell.exec(args).split("\n"))))
Ejemplo n.º 6
0
def destroy_snapshots(shell: Shell, snapshots: [Snapshot]):
    for dataset, snapshots in sortedgroupby(snapshots,
                                            lambda snapshot: snapshot.dataset):
        names = [snapshot.name for snapshot in snapshots]

        logger.info("On %r for dataset %r destroying snapshots %r", shell,
                    dataset, names)

        while names:
            args = ["zfs", "destroy", f"{dataset}@" + ",".join(names)]
            try:
                shell.exec(args)
                break
            except ExecException as e:
                m = re.search(
                    r"cannot destroy snapshot .+?@(.+?): dataset is busy",
                    e.stdout)
                if m is None:
                    raise

                name = m.group(1)
                logger.info("Snapshot %r on dataset %r is busy, skipping",
                            name, dataset)
                names.remove(name)
Ejemplo n.º 7
0
def list_datasets_with_properties(shell: Shell, dataset: str=None, recursive: bool=True, properties=None):
    properties = properties or []

    properties = ["name"] + properties

    args = ["zfs", "list", "-t", "filesystem,volume", "-H", "-o", ",".join(properties), "-s", "name"]
    if recursive:
        args.extend(["-r"])
    else:
        args.extend(["-d", "1"])
    if dataset is not None:
        args.append(dataset)

    with ZfsCliExceptionHandler():
        output = shell.exec(args)

    return [dict(zip(properties, line.split("\t"))) for line in filter(None, output.split("\n"))]
Ejemplo n.º 8
0
def list_datasets_with_properties(shell: Shell, dataset: str=None, recursive: bool=True, properties=None):
    properties = properties or {}

    properties["name"] = str

    args = ["zfs", "list", "-t", "filesystem,volume", "-H", "-o", ",".join(properties.keys()), "-s", "name"]
    if recursive:
        args.extend(["-r"])
    else:
        args.extend(["-d", "1"])
    if dataset is not None:
        args.append(dataset)

    with ZfsCliExceptionHandler():
        output = shell.exec(args)

    return [
        {
            property: parse_property(value, properties[property])
            for property, value in zip(properties, line.split("\t"))
        }
        for line in filter(None, output.split("\n"))
    ]
Ejemplo n.º 9
0
def inspect_data(shell: Shell, dataset: str, exclude: [str]=None):
    exclude = exclude or []

    try:
        dst_properties = get_properties(shell, dataset, {
            "type": str,
            "mounted": bool,
            "mountpoint": str,
            "referenced": int,
            "snapdir": str,
            "used": int,
        })
    except ExecException as e:
        if "dataset does not exist" in e.stdout:
            return None, None

        raise
    else:
        if (
                dst_properties["type"] == "filesystem" and
                dst_properties["mounted"] and
                dst_properties["mountpoint"] != "legacy"
        ):
            try:
                index = shell.ls(dst_properties["mountpoint"])
            except Exception as e:
                logger.warning(
                    "An exception occurred while listing dataset %r mountpoint %r on shell %r: %r. "
                    "Assuming dataset is not mounted",
                    dataset, dst_properties["mountpoint"], shell, e,
                )
            else:
                if dst_properties["snapdir"] == "visible" and ".zfs" in index:
                    index.remove(".zfs")

                for excluded in exclude:
                    if excluded not in index:
                        continue

                    child_mountpoint = os.path.join(dst_properties["mountpoint"], excluded)
                    try:
                        if not shell.is_dir(child_mountpoint):
                            continue
                    except Exception as e:
                        logger.warning(
                            "An exception occurred while checking if %r on shell %r is a directory: %r. "
                            "Assuming it is not",
                            child_mountpoint, shell, e,
                        )
                        continue

                    child_dataset = os.path.join(dataset, excluded)
                    try:
                        child_properties = get_properties(shell, child_dataset, {
                            "type": str,
                            "mounted": bool,
                            "mountpoint": str,
                        })
                    except Exception as e:
                        logger.warning(
                            "An exception occurred while getting properties for dataset %r on shell %r: %r. "
                            "Assuming it does not exist",
                            child_dataset, shell, e,
                        )
                        continue

                    if child_properties["type"] == "filesystem":
                        if child_properties["mounted"] and child_properties["mountpoint"] == child_mountpoint:
                            index.remove(excluded)
                        else:
                            try:
                                child_contents = shell.ls(child_mountpoint)
                            except Exception as e:
                                logger.warning(
                                    "An exception occurred while listing %r on shell %r: %r. Assuming it is not empty",
                                    child_mountpoint, shell, e,
                                )
                                continue
                            else:
                                if not child_contents:
                                    index.remove(excluded)

                return index, dst_properties

        return None, dst_properties
Ejemplo n.º 10
0
def run_replication_tasks(local_shell: LocalShell, transport: Transport, remote_shell: Shell,
                          replication_tasks: [ReplicationTask], observer=None):
    contexts = defaultdict(GlobalReplicationContext)

    replication_tasks_parts = calculate_replication_tasks_parts(replication_tasks)

    started_replication_tasks_ids = set()
    failed_replication_tasks_ids = set()
    replication_tasks_parts_left = {
        replication_task.id: len([1
                                  for another_replication_task, source_dataset in replication_tasks_parts
                                  if another_replication_task == replication_task])
        for replication_task in replication_tasks
    }
    for replication_task, source_dataset in replication_tasks_parts:
        if replication_task.id in failed_replication_tasks_ids:
            continue

        local_context = ReplicationContext(contexts[replication_task], None, local_shell)
        remote_context = ReplicationContext(contexts[replication_task], transport, remote_shell)

        if replication_task.direction == ReplicationDirection.PUSH:
            src_context = local_context
            dst_context = remote_context
        elif replication_task.direction == ReplicationDirection.PULL:
            src_context = remote_context
            dst_context = local_context
        else:
            raise ValueError(f"Invalid replication direction: {replication_task.direction!r}")

        if replication_task.id not in started_replication_tasks_ids:
            notify(observer, ReplicationTaskStart(replication_task.id))
            started_replication_tasks_ids.add(replication_task.id)
        recoverable_error = None
        recoverable_sleep = 1
        for i in range(replication_task.retries):
            if recoverable_error is not None:
                logger.info("After recoverable error sleeping for %d seconds", recoverable_sleep)
                time.sleep(recoverable_sleep)
                recoverable_sleep = min(recoverable_sleep * 2, 60)
            else:
                recoverable_sleep = 1

            try:
                try:
                    run_replication_task_part(replication_task, source_dataset, src_context, dst_context, observer)
                except socket.timeout:
                    raise RecoverableReplicationError("Network connection timeout") from None
                except paramiko.ssh_exception.NoValidConnectionsError as e:
                    raise RecoverableReplicationError(str(e).replace("[Errno None] ", "")) from None
                except paramiko.ssh_exception.SSHException as e:
                    if isinstance(e, (paramiko.ssh_exception.AuthenticationException,
                                      paramiko.ssh_exception.BadHostKeyException,
                                      paramiko.ssh_exception.ProxyCommandFailure,
                                      paramiko.ssh_exception.ConfigParseError)):
                        raise ReplicationError(str(e).replace("[Errno None] ", "")) from None
                    else:
                        # It might be an SSH error that leaves paramiko connection in an invalid state
                        # Let's reset remote shell just in case
                        remote_shell.close()
                        raise RecoverableReplicationError(str(e).replace("[Errno None] ", "")) from None
                except (IOError, OSError) as e:
                    raise RecoverableReplicationError(str(e)) from None
                replication_tasks_parts_left[replication_task.id] -= 1
                if replication_tasks_parts_left[replication_task.id] == 0:
                    notify(observer, ReplicationTaskSuccess(replication_task.id))
                break
            except RecoverableReplicationError as e:
                logger.warning("For task %r at attempt %d recoverable replication error %r", replication_task.id,
                               i + 1, e)
                recoverable_error = e
            except ReplicationError as e:
                logger.error("For task %r non-recoverable replication error %r", replication_task.id, e)
                notify(observer, ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
            except Exception as e:
                logger.error("For task %r unhandled replication error %r", replication_task.id, e, exc_info=True)
                notify(observer, ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
        else:
            logger.error("Failed replication task %r after %d retries", replication_task.id,
                         replication_task.retries)
            notify(observer, ReplicationTaskError(replication_task.id, str(recoverable_error)))
            failed_replication_tasks_ids.add(replication_task.id)
Ejemplo n.º 11
0
def create_dataset(shell: Shell, dataset: str):
    shell.exec(["zfs", "create", dataset])
Ejemplo n.º 12
0
def is_empty_snapshot(shell: Shell, snapshot: Snapshot):
    return shell.exec(["zfs", "get", "-H", "-o", "value", "written", str(snapshot)]).strip() == "0"