def run_replication_step(step: ReplicationStep, observer=None, observer_snapshot=None): logger.info( "For replication task %r: doing %s from %r to %r of snapshot=%r incremental_base=%r receive_resume_token=%r " "encryption=%r", step.replication_task.id, step.replication_task.direction.value, step.src_dataset, step.dst_dataset, step.snapshot, step.incremental_base, step.receive_resume_token, step.encryption is not None, ) observer_snapshot = observer_snapshot or step.snapshot notify( observer, ReplicationTaskSnapshotStart( step.replication_task.id, step.src_dataset, observer_snapshot, step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total, )) # Umount target dataset because we will be overwriting its contents and children mountpoints # will become dangling. ZFS will mount entire directory structure again after receiving. try: step.dst_context.shell.exec(["zfs", "umount", step.dst_dataset]) except ExecException: pass if step.replication_task.direction == ReplicationDirection.PUSH: local_context = step.src_context remote_context = step.dst_context elif step.replication_task.direction == ReplicationDirection.PULL: local_context = step.dst_context remote_context = step.src_context else: raise ValueError( f"Invalid replication direction: {step.replication_task.direction!r}" ) if step.replication_task.replicate: raw = any(step.src_context.datasets_encrypted.values()) else: raw = step.replication_task.properties and step.src_context.datasets_encrypted[ step.src_dataset] transport = remote_context.transport process = transport.replication_process( step.replication_task.id, transport, local_context.shell, remote_context.shell, step.replication_task.direction, step.src_dataset, step.dst_dataset, step.snapshot, step.replication_task.properties, list( set(step.replication_task.properties_exclude) & step.valid_properties), { k: v for k, v in step.replication_task.properties_override.items() if k in step.valid_properties }, step.replication_task.replicate, step.encryption, step.incremental_base, step.receive_resume_token, step.replication_task.compression, step.replication_task.speed_limit, step.replication_task.dedup, step.replication_task.large_block, step.replication_task.embed, step.replication_task.compressed, raw, ) process.add_progress_observer(lambda bytes_sent, bytes_total: notify( observer, ReplicationTaskSnapshotProgress( step.replication_task.id, step.src_dataset, observer_snapshot, step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total, bytes_sent, bytes_total, ))) process.add_warning_observer(step.template.src_context.context.add_warning) monitor = ReplicationMonitor(step.dst_context.shell, step.dst_dataset) ReplicationProcessRunner(process, monitor).run() step.template.src_context.context.snapshots_sent_by_replication_step_template[ step.template] += 1 notify( observer, ReplicationTaskSnapshotSuccess( step.replication_task.id, step.src_dataset, observer_snapshot, step.src_context.context.snapshots_sent, step.src_context.context.snapshots_total, )) if step.incremental_base is None: # Might have created dataset, need to set it to readonly handle_readonly(step.template)
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE: if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True): raise ReplicationError( f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, " "but replication task is set up to require this property. Refusing to replicate." ) plan = [] ignored_roots = set() for i, step_template in enumerate(step_templates): is_immediate_target_dataset = i == 0 ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None: if dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset, ) destroy_snapshots( step_template.dst_context.shell, [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots] ) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed" ) else: if not step_template.replication_task.allow_from_scratch: if is_immediate_target_dataset: # We are only interested in checking target datasets, not their children allowed_empty_children = [] if step_template.replication_task.recursive: allowed_dst_child_datasets = { get_target_dataset(step_template.replication_task, dataset) for dataset in ( set(step_template.src_context.datasets) - set(step_template.replication_task.exclude) ) if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset) } existing_dst_child_datasets = { dataset for dataset in step_template.dst_context.datasets if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset) } allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets) ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset, allowed_empty_children) if not snapshots: logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: # Target dataset does not exist, there is a chance that intermediate datasets also do not exist parent = os.path.dirname(step_template.dst_dataset) if "/" in parent: create_dataset(step_template.dst_context.shell, parent) encryption = None if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: encryption = step_template.replication_task.encryption step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots) plan.append((step_template, incremental_base, snapshots, encryption)) for step_template, incremental_base, snapshots, encryption in plan: replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer) handle_readonly(step_template)
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE: if not step_template.dst_context.datasets_readonly.get( step_template.dst_dataset, True): message = ( f"Target dataset {step_template.dst_dataset!r} exists and does not have readonly=on property, " "but replication task is set up to require this property. Refusing to replicate." ) try: target_type = get_property(step_template.dst_context.shell, step_template.dst_dataset, "type") except Exception: pass else: if target_type == "volume": message += ( f" Please run \"zfs set readonly=on {step_template.dst_dataset}\" on the target system " "to fix this.") raise ReplicationError(message) plan = [] ignored_roots = set() for i, step_template in enumerate(step_templates): is_immediate_target_dataset = i == 0 ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug( "Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[ step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get( step_template.dst_dataset, []) incremental_base, snapshots, include_intermediate = get_snapshots_to_send( src_snapshots, dst_snapshots, step_template.replication_task, step_template.src_context.shell, step_template.src_dataset, ) if incremental_base is None and snapshots: if dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying destination dataset", step_template.replication_task.id, step_template.src_dataset, ) step_template.dst_context.shell.exec( ["zfs", "destroy", "-r", step_template.dst_dataset]) for dictionary in ( step_template.dst_context.datasets, step_template.dst_context.datasets_encrypted, step_template.dst_context.datasets_readonly, step_template.dst_context. datasets_receive_resume_tokens, ): if dictionary is None: continue for k in list(dictionary.keys()): if k == step_template.dst_dataset or k.startswith( f"{step_template.dst_dataset}/"): dictionary.pop(k) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed") else: if not step_template.replication_task.allow_from_scratch: if is_immediate_target_dataset: # We are only interested in checking target datasets, not their children allowed_empty_children = [] if step_template.replication_task.recursive: allowed_dst_child_datasets = { get_target_dataset( step_template.replication_task, dataset) for dataset in (set(step_template.src_context.datasets) - set(step_template.replication_task.exclude)) if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset) } existing_dst_child_datasets = { dataset for dataset in step_template.dst_context.datasets if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset) } allowed_empty_children = list( allowed_dst_child_datasets & existing_dst_child_datasets) ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset, allowed_empty_children) if not snapshots: logger.info( "No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if is_immediate_target_dataset and incremental_base is None: raise ReplicationError( f"Dataset {step_template.src_dataset!r} does not have any matching snapshots to replicate" ) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: # Target dataset does not exist, there is a chance that intermediate datasets also do not exist parent = os.path.dirname(step_template.dst_dataset) if "/" in parent: create_dataset(step_template.dst_context.shell, parent) encryption = None if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: encryption = step_template.replication_task.encryption step_template.src_context.context.snapshots_total_by_replication_step_template[ step_template] += len(snapshots) plan.append((step_template, incremental_base, snapshots, include_intermediate, encryption)) for step_template, incremental_base, snapshots, include_intermediate, encryption in plan: replicate_snapshots(step_template, incremental_base, snapshots, include_intermediate, encryption, observer) handle_readonly(step_template)