def build_tree(self, share_uuid, root_uuid): """Builds and returns a tree representing the metadata for the given subtree in the given share. @param share_uuid: the share UUID or None for the user's volume @param root_uuid: the root UUID of the subtree (must be a directory) @return: a MergeNode tree """ root = MergeNode(node_type=DIRECTORY, uuid=root_uuid) @log_timing @inlineCallbacks def _get_root_content_hash(): """Obtain the content hash for the root node.""" result = yield self._get_node_hashes(share_uuid) returnValue(result.get(root_uuid, None)) root.content_hash = self.defer_from_thread(_get_root_content_hash) if root.content_hash is None: raise ValueError("No content available for node %s" % root_uuid) @log_timing @inlineCallbacks def _get_children(parent_uuid, parent_content_hash): """Obtain a sequence of MergeNodes corresponding to a node's immediate children. """ entries = yield self._get_dir_entries(share_uuid, parent_uuid) children = {} for entry in entries: if should_sync(entry.name): child = MergeNode(node_type=entry.node_type, uuid=uuid.UUID(entry.node)) children[entry.name] = child content_hashes = yield self._get_node_hashes(share_uuid) for child in children.values(): child.content_hash = content_hashes.get(child.uuid, None) returnValue(children) need_children = [root] while need_children: node = need_children.pop() if node.content_hash is not None: children = self.defer_from_thread(_get_children, node.uuid, node.content_hash) node.children = children for child in children.values(): if child.node_type == DIRECTORY: need_children.append(child) return root
def scan_directory(path, display_path="", quiet=False): """Scans a local directory and builds an in-memory tree from it.""" if display_path != "" and not quiet: print display_path link_target = None child_names = None try: link_target = os.readlink(path) except OSError as e: if e.errno != EINVAL: raise try: child_names = os.listdir(path) except OSError as e: if e.errno != ENOTDIR: raise if link_target is not None: # symlink sum = hashlib.sha1() sum.update(link_target) content_hash = "sha1:%s" % sum.hexdigest() return MergeNode(node_type=SYMLINK, content_hash=content_hash) elif child_names is not None: # directory child_names = [ n for n in child_names if should_sync(n.decode("utf-8")) ] child_paths = [(os.path.join(path, child_name), os.path.join(display_path, child_name)) for child_name in child_names] children = [ scan_directory(child_path, child_display_path, quiet) for (child_path, child_display_path) in child_paths ] unicode_child_names = [n.decode("utf-8") for n in child_names] children = dict(zip(unicode_child_names, children)) return MergeNode(node_type=DIRECTORY, children=children) else: # regular file sum = hashlib.sha1() class HashStream(object): """Stream that computes hashes.""" def write(self, bytes): """Accumulate bytes.""" sum.update(bytes) with open(path, "r") as stream: shutil.copyfileobj(stream, HashStream()) content_hash = "sha1:%s" % sum.hexdigest() return MergeNode(node_type=FILE, content_hash=content_hash)
def test_clobber(self): """Tests clobbering merges.""" server_tree = MergeNode(DIRECTORY, children={ 'foo': MergeNode(FILE, content_hash="dummy:abc"), 'bar': MergeNode(FILE, content_hash="dummy:xyz"), 'baz': MergeNode(FILE, content_hash="dummy:aaa"), }) local_tree = MergeNode(DIRECTORY, children={ 'foo': MergeNode(FILE, content_hash="dummy:cde"), 'bar': MergeNode(FILE, content_hash="dummy:zyx"), 'hoge': MergeNode(FILE, content_hash="dummy:bbb"), }) result_tree = merge_trees(local_tree, local_tree, server_tree, server_tree, ClobberServerMerge()) self.assertEqual(local_tree, result_tree) result_tree = merge_trees(local_tree, local_tree, server_tree, server_tree, ClobberLocalMerge()) self.assertEqual(server_tree, result_tree)
def merge_node(self, old_local_node, local_node, old_remote_node, remote_node, node_type, merged_children): """Copy the remote node.""" if remote_node is None: return None return MergeNode(node_type=node_type, uuid=remote_node.uuid, content_hash=remote_node.content_hash, children=merged_children)
def _get_children(parent_uuid, parent_content_hash): """Obtain a sequence of MergeNodes corresponding to a node's immediate children. """ entries = yield self._get_dir_entries(share_uuid, parent_uuid) children = {} for entry in entries: if should_sync(entry.name): child = MergeNode(node_type=entry.node_type, uuid=uuid.UUID(entry.node)) children[entry.name] = child content_hashes = yield self._get_node_hashes(share_uuid) for child in children.values(): child.content_hash = content_hashes.get(child.uuid, None) returnValue(children)
def test_generic_merge(self): """Tests that generic merge behaves as expected.""" tree_a = MergeNode(DIRECTORY, children={ 'foo': MergeNode(FILE, uuid=uuid.uuid4()), 'bar': MergeNode(FILE, uuid=uuid.uuid4()), }, uuid=uuid.uuid4()) tree_b = MergeNode(DIRECTORY, children={ 'bar': MergeNode(FILE, uuid=uuid.uuid4()), 'baz': MergeNode(FILE, uuid=uuid.uuid4()), }, uuid=uuid.uuid4()) result = generic_merge(trees=[tree_a, tree_b], pre_merge=accumulate_path, post_merge=capture_merge, partial_parent="", name="ex") expected_result = ([tree_a, tree_b], "ex", { 'foo': ([tree_a.children['foo'], None], "ex/foo", {}), 'bar': ([tree_a.children['bar'], tree_b.children['bar']], "ex/bar", {}), 'baz': ([None, tree_b.children['baz']], "ex/baz", {}), }) self.assertEqual(expected_result, result)
def merge_node(self, old_local_node, local_node, old_remote_node, remote_node, node_type, merged_children): """Copy local node and associate with remote uuid (if applicable).""" if local_node is None: return None if remote_node is not None: node_uuid = remote_node.uuid else: node_uuid = None return MergeNode(node_type=local_node.node_type, uuid=node_uuid, content_hash=local_node.content_hash, children=merged_children)
def merge_node(self, old_local_node, local_node, old_remote_node, remote_node, node_type, merged_children): """Performs bidirectional merge of node state.""" def node_content_hash(node): """Returns node content hash if node is not None""" return node.content_hash if node is not None else None old_local_content_hash = node_content_hash(old_local_node) local_content_hash = node_content_hash(local_node) old_remote_content_hash = node_content_hash(old_remote_node) remote_content_hash = node_content_hash(remote_node) locally_deleted = old_local_node is not None and local_node is None deleted_on_server = old_remote_node is not None and remote_node is None # updated means modified or created locally_updated = (not locally_deleted and old_local_content_hash != local_content_hash) updated_on_server = (not deleted_on_server and old_remote_content_hash != remote_content_hash) has_merged_children = (merged_children is not None and len(merged_children) > 0) either_node_exists = local_node is not None or remote_node is not None should_delete = ((locally_deleted and not updated_on_server) or (deleted_on_server and not locally_updated)) if (either_node_exists and not should_delete) or has_merged_children: if (node_type != DIRECTORY and locally_updated and updated_on_server and local_content_hash != remote_content_hash): # local_content_hash will become the merged content_hash; # save remote_content_hash in conflict info conflict_info = (str(uuid.uuid4()), remote_content_hash) else: conflict_info = None node_uuid = remote_node.uuid if remote_node is not None else None if locally_updated: content_hash = local_content_hash or remote_content_hash else: content_hash = remote_content_hash or local_content_hash return MergeNode(node_type=node_type, uuid=node_uuid, children=merged_children, content_hash=content_hash, conflict_info=conflict_info) else: return None
def post_merge(nodes, partial_result, child_results): """Delete nodes.""" (merged_node, original_node) = nodes (path, display_path, node_uuid, synced) = partial_result if merged_node is None: assert original_node is not None if not quiet: print "%s %s %s" % (sync_mode.symbol, DELETE_SYMBOL, display_path) try: if original_node.node_type == DIRECTORY: sync_mode.delete_directory(node_uuid=original_node.uuid, path=path) else: # files or symlinks sync_mode.delete_file(node_uuid=original_node.uuid, path=path) synced = True except NodeDeleteError as e: print e if synced: model_node = merged_node else: model_node = original_node if model_node is not None: if model_node.node_type == DIRECTORY: child_iter = child_results.items() merged_children = dict( (name, child) for (name, child) in child_iter if child is not None) else: # if there are children here it's because they failed to delete merged_children = None return MergeNode(node_type=model_node.node_type, uuid=model_node.uuid, children=merged_children, content_hash=model_node.content_hash) else: return None
def test_sync(self): """Test sync merges.""" server_tree = MergeNode(DIRECTORY, children={ 'bar': MergeNode(FILE, content_hash="dummy:xyz"), 'baz': MergeNode(FILE, content_hash="dummy:aaa"), 'foo': MergeNode(FILE, content_hash="dummy:abc"), }) old_server_tree = MergeNode(DIRECTORY, children={}) local_tree = MergeNode(DIRECTORY, children={ 'bar': MergeNode(FILE, content_hash="dummy:xyz"), 'foo': MergeNode(FILE, content_hash="dummy:abc"), 'hoge': MergeNode(FILE, content_hash="dummy:bbb"), }) old_local_tree = MergeNode(DIRECTORY, children={}) expected_tree = MergeNode(DIRECTORY, children={ 'bar': MergeNode(FILE, content_hash="dummy:xyz"), 'baz': MergeNode(FILE, content_hash="dummy:aaa"), 'foo': MergeNode(FILE, content_hash="dummy:abc"), 'hoge': MergeNode(FILE, content_hash="dummy:bbb"), }) result_tree = merge_trees(old_local_tree, local_tree, old_server_tree, server_tree, SyncMerge()) self.assertEqual(result_tree, expected_tree)