コード例 #1
0
ファイル: run.py プロジェクト: hartzell-forks/zettarepl
def run_replication_task_part(replication_task: ReplicationTask,
                              source_dataset: str,
                              src_context: ReplicationContext,
                              dst_context: ReplicationContext, observer):
    target_dataset = get_target_dataset(replication_task, source_dataset)

    check_target_type(replication_task, source_dataset, src_context,
                      dst_context)

    step_templates = calculate_replication_step_templates(
        replication_task, source_dataset, src_context, dst_context)

    destroy_empty_encrypted_target(replication_task, source_dataset,
                                   dst_context)

    with DatasetSizeObserver(
            src_context.shell, dst_context.shell, source_dataset,
            target_dataset, lambda src_used, dst_used: notify(
                observer,
                ReplicationTaskDataProgress(
                    replication_task.id, source_dataset, src_used, dst_used))):
        resumed = resume_replications(step_templates, observer)
        if resumed:
            step_templates = calculate_replication_step_templates(
                replication_task, source_dataset, src_context, dst_context)

        run_replication_steps(step_templates, observer)

    mount_dst_datasets(dst_context, target_dataset, replication_task.recursive)
コード例 #2
0
def run_replication_step(step: ReplicationStep, observer=None):
    logger.info("For replication task %r: doing %s from %r to %r of snapshot=%r incremental_base=%r "
                "receive_resume_token=%r", step.replication_task.id, step.replication_task.direction.value,
                step.src_dataset, step.dst_dataset, step.snapshot, step.incremental_base,
                step.receive_resume_token)

    if step.replication_task.direction == ReplicationDirection.PUSH:
        local_context = step.src_context
        remote_context = step.dst_context
    elif step.replication_task.direction == ReplicationDirection.PULL:
        local_context = step.dst_context
        remote_context = step.src_context
    else:
        raise ValueError(f"Invalid replication direction: {step.replication_task.direction!r}")

    transport = remote_context.transport

    process = transport.replication_process(
        step.replication_task.id,
        transport,
        local_context.shell,
        remote_context.shell,
        step.replication_task.direction,
        step.src_dataset,
        step.dst_dataset,
        step.snapshot,
        step.replication_task.properties,
        step.replication_task.replicate,
        step.incremental_base,
        step.receive_resume_token,
        step.replication_task.compression,
        step.replication_task.speed_limit,
        step.replication_task.dedup,
        step.replication_task.large_block,
        step.replication_task.embed,
        step.replication_task.compressed)
    process.add_progress_observer(
        lambda snapshot, current, total:
            notify(observer, ReplicationTaskSnapshotProgress(step.replication_task.id, snapshot.split("@")[0],
                                                             snapshot.split("@")[1], current, total)))
    monitor = ReplicationMonitor(step.dst_context.shell, step.dst_dataset)
    ReplicationProcessRunner(process, monitor).run()

    notify(observer, ReplicationTaskSnapshotSuccess(step.replication_task.id, step.src_dataset, step.snapshot))
コード例 #3
0
    def _spawn_replication_tasks(self, replication_tasks):
        with self.tasks_lock:
            for replication_task in replication_tasks:
                if replication_task in self.pending_tasks:
                    logger.debug("Replication task %r is already pending")
                    continue

                if self._can_spawn_replication_task(replication_task):
                    self._spawn_replication_task(replication_task)
                else:
                    logger.info(
                        "Replication task %r can't execute in parallel with already running tasks, "
                        "delaying it", replication_task)
                    notify(self.observer,
                           ReplicationTaskScheduled(replication_task.id))
                    self.pending_tasks.append(replication_task)

            if not self.pending_tasks and not self.running_tasks and not self.retention_running:
                self._spawn_retention()
コード例 #4
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        tasks_with_snapshot_names = sorted(
            [(task, now.strftime(task.naming_schema)) for task in tasks],
            key=lambda task_with_snapshot_name: (
                # Lexicographically less snapshots should go first
                task_with_snapshot_name[1],
                # Recursive snapshot with same name as non-recursive should go first
                0 if task_with_snapshot_name[0].recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not task_with_snapshot_name[0].exclude else 1,
            ))

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                continue

            options = notify(self.observer, PeriodicSnapshotTaskStart(task.id))
            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude, options.properties)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)

                notify(self.observer,
                       PeriodicSnapshotTaskError(task.id, str(e)))
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
コード例 #5
0
    def _run_periodic_snapshot_tasks(self, now, tasks):
        scheduled_tasks = []
        for task in tasks:
            snapshot_name = get_snapshot_name(now, task.naming_schema)

            try:
                parsed_snapshot_name = parse_snapshot_name(
                    snapshot_name, task.naming_schema)
            except ValueError as e:
                logger.warning(
                    "Unable to parse snapshot name %r with naming schema %r: %s. Skipping task %r",
                    snapshot_name,
                    task.naming_schema,
                    str(e),
                    task,
                )

                notify(
                    self.observer,
                    PeriodicSnapshotTaskError(
                        task.id, "Unable to parse snapshot name %r: %s" % (
                            snapshot_name,
                            str(e),
                        )))
                continue

            scheduled_tasks.append(
                ScheduledPeriodicSnapshotTask(
                    task,
                    snapshot_name,
                    parsed_snapshot_name,
                ))

        scheduled_tasks = sorted(
            scheduled_tasks,
            key=lambda scheduled_task: (
                # Common sorting order
                parsed_snapshot_sort_key(scheduled_task.parsed_snapshot_name),
                # Recursive snapshot with same name as non-recursive should go first
                0 if scheduled_task.task.recursive else 1,
                # Recursive snapshots without exclude should go first
                0 if not scheduled_task.task.exclude else 1,
            ))

        tasks_with_snapshot_names = [(scheduled_task.task,
                                      scheduled_task.snapshot_name)
                                     for scheduled_task in scheduled_tasks]

        created_snapshots = set()
        for task, snapshot_name in tasks_with_snapshot_names:
            snapshot = Snapshot(task.dataset, snapshot_name)
            if snapshot in created_snapshots:
                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))
                continue

            options = notify(self.observer, PeriodicSnapshotTaskStart(task.id))
            try:
                create_snapshot(self.local_shell, snapshot, task.recursive,
                                task.exclude, options.properties)
            except CreateSnapshotError as e:
                logger.warning("Error creating %r: %r", snapshot, e)

                notify(self.observer,
                       PeriodicSnapshotTaskError(task.id, str(e)))
            else:
                logger.info("Created %r", snapshot)
                created_snapshots.add(snapshot)

                notify(self.observer, PeriodicSnapshotTaskSuccess(task.id))

        empty_snapshots = get_empty_snapshots_for_deletion(
            self.local_shell, tasks_with_snapshot_names)
        if empty_snapshots:
            logger.info("Destroying empty snapshots: %r", empty_snapshots)
            destroy_snapshots(self.local_shell, empty_snapshots)
コード例 #6
0
def run_replication_tasks(local_shell: LocalShell, transport: Transport, remote_shell: Shell,
                          replication_tasks: [ReplicationTask], observer=None):
    replication_tasks_parts = calculate_replication_tasks_parts(replication_tasks)

    started_replication_tasks_ids = set()
    failed_replication_tasks_ids = set()
    replication_tasks_parts_left = {
        replication_task.id: len([1
                                  for another_replication_task, source_dataset in replication_tasks_parts
                                  if another_replication_task == replication_task])
        for replication_task in replication_tasks
    }
    for replication_task, source_dataset in replication_tasks_parts:
        if replication_task.id in failed_replication_tasks_ids:
            continue

        local_context = ReplicationContext(None, local_shell)
        remote_context = ReplicationContext(transport, remote_shell)

        if replication_task.direction == ReplicationDirection.PUSH:
            src_context = local_context
            dst_context = remote_context
        elif replication_task.direction == ReplicationDirection.PULL:
            src_context = remote_context
            dst_context = local_context
        else:
            raise ValueError(f"Invalid replication direction: {replication_task.direction!r}")

        if replication_task.id not in started_replication_tasks_ids:
            notify(observer, ReplicationTaskStart(replication_task.id))
            started_replication_tasks_ids.add(replication_task.id)
        recoverable_error = None
        recoverable_sleep = 1
        for i in range(replication_task.retries):
            if recoverable_error is not None:
                logger.info("After recoverable error sleeping for %d seconds", recoverable_sleep)
                time.sleep(recoverable_sleep)
                recoverable_sleep = min(recoverable_sleep * 2, 60)
            else:
                recoverable_sleep = 1

            try:
                try:
                    run_replication_task_part(replication_task, source_dataset, src_context, dst_context, observer)
                except socket.timeout:
                    raise RecoverableReplicationError("Network connection timeout") from None
                except paramiko.ssh_exception.NoValidConnectionsError as e:
                    raise RecoverableReplicationError(str(e).replace("[Errno None] ", "")) from None
                except (IOError, OSError) as e:
                    raise RecoverableReplicationError(str(e)) from None
                replication_tasks_parts_left[replication_task.id] -= 1
                if replication_tasks_parts_left[replication_task.id] == 0:
                    notify(observer, ReplicationTaskSuccess(replication_task.id))
                break
            except RecoverableReplicationError as e:
                logger.warning("For task %r at attempt %d recoverable replication error %r", replication_task.id,
                               i + 1, e)
                recoverable_error = e
            except ReplicationError as e:
                logger.error("For task %r non-recoverable replication error %r", replication_task.id, e)
                notify(observer, ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
            except Exception as e:
                logger.error("For task %r unhandled replication error %r", replication_task.id, e, exc_info=True)
                notify(observer, ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
        else:
            logger.error("Failed replication task %r after %d retries", replication_task.id,
                         replication_task.retries)
            notify(observer, ReplicationTaskError(replication_task.id, str(recoverable_error)))
            failed_replication_tasks_ids.add(replication_task.id)
コード例 #7
0
ファイル: run.py プロジェクト: nextge-nas/zettarepl
def run_replication_step(step: ReplicationStep, observer=None, observer_snapshot=None):
    logger.info(
        "For replication task %r: doing %s from %r to %r of snapshot=%r incremental_base=%r receive_resume_token=%r "
        "encryption=%r",
        step.replication_task.id, step.replication_task.direction.value, step.src_dataset, step.dst_dataset,
        step.snapshot, step.incremental_base, step.receive_resume_token, step.encryption is not None,
    )

    observer_snapshot = observer_snapshot or step.snapshot

    notify(observer, ReplicationTaskSnapshotStart(
        step.replication_task.id, step.src_dataset, observer_snapshot,
        step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total,
    ))

    # Umount target dataset because we will be overwriting its contents and children mountpoints
    # will become dangling. ZFS will mount entire directory structure again after receiving.
    try:
        step.dst_context.shell.exec(["zfs", "umount", step.dst_dataset])
    except ExecException:
        pass

    if step.replication_task.direction == ReplicationDirection.PUSH:
        local_context = step.src_context
        remote_context = step.dst_context
    elif step.replication_task.direction == ReplicationDirection.PULL:
        local_context = step.dst_context
        remote_context = step.src_context
    else:
        raise ValueError(f"Invalid replication direction: {step.replication_task.direction!r}")

    transport = remote_context.transport

    process = transport.replication_process(
        step.replication_task.id,
        transport,
        local_context.shell,
        remote_context.shell,
        step.replication_task.direction,
        step.src_dataset,
        step.dst_dataset,
        step.snapshot,
        step.replication_task.properties,
        step.replication_task.properties_exclude,
        step.replication_task.properties_override,
        step.replication_task.replicate,
        step.encryption,
        step.incremental_base,
        step.receive_resume_token,
        step.replication_task.compression,
        step.replication_task.speed_limit,
        step.replication_task.dedup,
        step.replication_task.large_block,
        step.replication_task.embed,
        step.replication_task.compressed,
        step.replication_task.properties and step.src_context.datasets_encrypted[step.src_dataset],
    )
    process.add_progress_observer(
        lambda bytes_sent, bytes_total:
            notify(observer, ReplicationTaskSnapshotProgress(
                step.replication_task.id, step.src_dataset, observer_snapshot,
                step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total,
                bytes_sent, bytes_total,
            ))
    )
    monitor = ReplicationMonitor(step.dst_context.shell, step.dst_dataset)
    ReplicationProcessRunner(process, monitor).run()

    step.template.src_context.context.snapshots_sent_by_replication_step_template[step.template] += 1
    notify(observer, ReplicationTaskSnapshotSuccess(
        step.replication_task.id, step.src_dataset, observer_snapshot,
        step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total,
    ))

    if step.incremental_base is None:
        # Might have created dataset, need to set it to readonly
        handle_readonly(step.template)
コード例 #8
0
def run_replication_tasks(local_shell: LocalShell,
                          transport: Transport,
                          remote_shell: Shell,
                          replication_tasks: [ReplicationTask],
                          observer=None):
    contexts = defaultdict(GlobalReplicationContext)

    replication_tasks_parts = calculate_replication_tasks_parts(
        replication_tasks)

    started_replication_tasks_ids = set()
    failed_replication_tasks_ids = set()
    replication_tasks_parts_left = {
        replication_task.id: len([
            1 for another_replication_task, source_dataset in
            replication_tasks_parts
            if another_replication_task == replication_task
        ])
        for replication_task in replication_tasks
    }
    for replication_task, source_dataset in replication_tasks_parts:
        if replication_task.id in failed_replication_tasks_ids:
            continue

        local_context = ReplicationContext(contexts[replication_task], None,
                                           local_shell)
        remote_context = ReplicationContext(contexts[replication_task],
                                            transport, remote_shell)

        if replication_task.direction == ReplicationDirection.PUSH:
            src_context = local_context
            dst_context = remote_context
        elif replication_task.direction == ReplicationDirection.PULL:
            src_context = remote_context
            dst_context = local_context
        else:
            raise ValueError(
                f"Invalid replication direction: {replication_task.direction!r}"
            )

        if replication_task.id not in started_replication_tasks_ids:
            notify(observer, ReplicationTaskStart(replication_task.id))
            started_replication_tasks_ids.add(replication_task.id)
        recoverable_error = None
        recoverable_sleep = 1
        for i in range(replication_task.retries):
            if recoverable_error is not None:
                logger.info("After recoverable error sleeping for %d seconds",
                            recoverable_sleep)
                time.sleep(recoverable_sleep)
                recoverable_sleep = min(recoverable_sleep * 2, 60)
            else:
                recoverable_sleep = 1

            try:
                try:
                    run_replication_task_part(replication_task, source_dataset,
                                              src_context, dst_context,
                                              observer)
                except socket.timeout:
                    raise RecoverableReplicationError(
                        "Network connection timeout") from None
                except paramiko.ssh_exception.NoValidConnectionsError as e:
                    raise RecoverableReplicationError(
                        str(e).replace("[Errno None] ", "")) from None
                except paramiko.ssh_exception.SSHException as e:
                    if isinstance(
                            e, (paramiko.ssh_exception.AuthenticationException,
                                paramiko.ssh_exception.BadHostKeyException,
                                paramiko.ssh_exception.ProxyCommandFailure,
                                paramiko.ssh_exception.ConfigParseError)):
                        raise ReplicationError(
                            str(e).replace("[Errno None] ", "")) from None
                    else:
                        # It might be an SSH error that leaves paramiko connection in an invalid state
                        # Let's reset remote shell just in case
                        remote_shell.close()
                        raise RecoverableReplicationError(
                            str(e).replace("[Errno None] ", "")) from None
                except ExecException as e:
                    if e.returncode == 128 + signal.SIGPIPE:
                        for warning in warnings_from_zfs_success(e.stdout):
                            contexts[replication_task].add_warning(warning)
                        raise RecoverableReplicationError(
                            broken_pipe_error(e.stdout))
                    else:
                        raise
                except (IOError, OSError) as e:
                    raise RecoverableReplicationError(str(e)) from None
                replication_tasks_parts_left[replication_task.id] -= 1
                if replication_tasks_parts_left[replication_task.id] == 0:
                    notify(
                        observer,
                        ReplicationTaskSuccess(
                            replication_task.id,
                            contexts[replication_task].warnings))
                break
            except RecoverableReplicationError as e:
                logger.warning(
                    "For task %r at attempt %d recoverable replication error %r",
                    replication_task.id, i + 1, e)
                recoverable_error = e
            except ReplicationError as e:
                logger.error(
                    "For task %r non-recoverable replication error %r",
                    replication_task.id, e)
                notify(observer,
                       ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
            except Exception as e:
                logger.error("For task %r unhandled replication error %r",
                             replication_task.id,
                             e,
                             exc_info=True)
                notify(observer,
                       ReplicationTaskError(replication_task.id, str(e)))
                failed_replication_tasks_ids.add(replication_task.id)
                break
        else:
            logger.error("Failed replication task %r after %d retries",
                         replication_task.id, replication_task.retries)
            notify(
                observer,
                ReplicationTaskError(replication_task.id,
                                     str(recoverable_error)))
            failed_replication_tasks_ids.add(replication_task.id)
コード例 #9
0
ファイル: run.py プロジェクト: NHGmaniac/zettarepl
def run_replication_step(step: ReplicationStep,
                         observer=None,
                         observer_snapshot=None):
    logger.info(
        "For replication task %r: doing %s from %r to %r of snapshot=%r incremental_base=%r "
        "receive_resume_token=%r", step.replication_task.id,
        step.replication_task.direction.value, step.src_dataset,
        step.dst_dataset, step.snapshot, step.incremental_base,
        step.receive_resume_token)

    observer_snapshot = observer_snapshot or step.snapshot

    notify(
        observer,
        ReplicationTaskSnapshotStart(
            step.replication_task.id,
            step.src_dataset,
            observer_snapshot,
            step.src_context.context.snapshots_sent,
            step.src_context.context.snapshots_total,
        ))

    if step.replication_task.direction == ReplicationDirection.PUSH:
        local_context = step.src_context
        remote_context = step.dst_context
    elif step.replication_task.direction == ReplicationDirection.PULL:
        local_context = step.dst_context
        remote_context = step.src_context
    else:
        raise ValueError(
            f"Invalid replication direction: {step.replication_task.direction!r}"
        )

    transport = remote_context.transport

    process = transport.replication_process(
        step.replication_task.id, transport, local_context.shell,
        remote_context.shell, step.replication_task.direction,
        step.src_dataset, step.dst_dataset, step.snapshot,
        step.replication_task.properties, step.replication_task.replicate,
        step.incremental_base, step.receive_resume_token,
        step.replication_task.compression, step.replication_task.speed_limit,
        step.replication_task.dedup, step.replication_task.large_block,
        step.replication_task.embed, step.replication_task.compressed)
    process.add_progress_observer(lambda bytes_sent, bytes_total: notify(
        observer,
        ReplicationTaskSnapshotProgress(
            step.replication_task.id,
            step.src_dataset,
            observer_snapshot,
            step.src_context.context.snapshots_sent,
            step.src_context.context.snapshots_total,
            bytes_sent,
            bytes_total,
        )))
    monitor = ReplicationMonitor(step.dst_context.shell, step.dst_dataset)
    ReplicationProcessRunner(process, monitor).run()

    step.template.src_context.context.snapshots_sent_by_replication_step_template[
        step.template] += 1
    notify(
        observer,
        ReplicationTaskSnapshotSuccess(
            step.replication_task.id,
            step.src_dataset,
            observer_snapshot,
            step.src_context.context.snapshots_sent,
            step.src_context.context.snapshots_total,
        ))

    if step.incremental_base is None:
        # Might have created dataset, need to set it to readonly
        handle_readonly(step.template)