def __list(self, parent: CDirTree, do_hash=False): assert isinstance(parent, CDirTree) path_names: List[str] = self.__fs.listdir(parent) parent_names: Tuple[str, ...] = parent.names # transform path into cpath, but don't do hash because: the file might already be ignored by a fs matcher # config file. hash in listing block child_cpaths = [] for path_name in path_names: names = (*parent_names, path_name) child_cpath = CPath(names) if self.__fs.is_file(child_cpath): mtime = self.__fs.getmtime(child_cpath) size = self.__fs.getsize(child_cpath) child_cpath = CFile(names, mtime, size) else: child_cpath = CDir(names) child_cpaths.append(child_cpath) # matcher file finding block # search for fs matcher config files for child_cpath in child_cpaths: if self.__fs.is_real_fs() and child_cpath.is_file(): if FsMatcherMapper.exists(child_cpath.name): matcher_cfile = child_cpath # check includer or excluder # TODO: read content of fs matcher files (e.g. gitignore) and then match further relative to that. # make a fs matcher and attache it to the parent. content = "" with self.__fs.open(matcher_cfile, "r", encoding="utf-8") as f: content = f.read() fs_matcher: AbcFsMatcher fs_matcher = FsMatcherMapper.get(matcher_cfile.name)(content) # group will decide where to place includer and where excluder. # root group will forward to appropriate matcher group self.__fs_matcher_root_group.add(fs_matcher, parent) # now list & hash if not ignored for child_cpath in child_cpaths: if not self.__fs_matcher_root_group.should_include(child_cpath): continue if child_cpath.is_file() and do_hash: hash_value = self.__fs.gethash(child_cpath) child_cpath = CFileHashed(child_cpath.names, child_cpath.mtime, child_cpath.size, hash_value) if child_cpath.is_dir(): last_subtree = parent.add(child_cpath) self.__list(last_subtree, do_hash=do_hash) else: parent.add(child_cpath)
def test_exists(self): tree1 = CDirTree("a/b/c") ftree = tree1.add(CDir('a/b/c/d/e/f')) # Notice this FALSE self.assertFalse(tree1.exists(('a', 'b'))) self.assertTrue(tree1.exists(('d', )))
def test_to_dict__to_json_equal(self): root_tree1 = CDirTree() root_tree1.add(CFile("dir1/a.txt", 1, 2)) root_tree1.add(CDir("dir2/")) root_tree1.add(CDir("dir3/p/q/r")) root_tree1.add(CFile("dir3/p/x.txt", 1, 5)) root_to_dict = json.loads(json.dumps(root_tree1.to_dict())) # for removing tuple list inconsistency self.assertEqual(root_to_dict, json.loads(root_tree1.to_json()))
def test_get_descendant_cpaths(self): root_tree = CDirTree() root_tree.add(CFile("dir1/a.txt", 1, 2)) root_tree.add(CDir("dir2/")) root_tree.add(CDir("dir3/p/q/r")) root_tree.add(CFile("dir3/p/x.txt", 1, 5)) dcp = root_tree.get_descendant_cpaths() self.assertEqual(len(dcp), 8)
def test_get_children(self): root_tree = CDirTree() root_tree.add(CFile("dir1/a.txt", 1, 2)) root_tree.add(CDir("dir2/")) root_tree.add(CDir("dir3/p/q/r")) root_tree.add(CFile("dir3/p/x.txt", 1, 5)) children = root_tree.get_children() self.assertIsInstance(children[0], CDirTree) self.assertEqual(3, len(children))
def test_as_cdir(self): tree1 = CDirTree("") self.assertIsInstance(tree1, CDir) self.assertIs(tree1.as_cdir.__class__, CDir) self.assertEqual(tuple(), tree1.names) tree2 = CDirTree("a/b/c") etree = tree2.add(CDir('a/b/c/d/e')) self.assertEqual(('a', 'b', 'c', 'd', 'e'), etree.names)
def test_is_sub(self): tree1 = CDirTree("") self.assertFalse(tree1.is_sub()) tree2 = CDirTree("") subtree = tree2.add(CDir("a/b/c")) self.assertTrue(subtree.is_sub()) "Let's check if the rule of the universe still holds" self.assertEqual(('a', 'b', 'c'), subtree.names)
def test_add(self): tree1 = CDirTree("a/b/c") etree = tree1.add(CDir('a/b/c/d/e')) self.assertEqual(etree.path, 'a/b/c/d/e/') ddir = tree1.get(CPath("a/b/c/d/")) self.assertEqual(('a', 'b', 'c', 'd'), ddir.names) ddir2 = tree1.get(CPath("a/b/c/d")) self.assertIsNotNone(ddir2) ddir4 = tree1.get(CDir("a/b/c/d")) self.assertIsNotNone(ddir4) ddir3 = tree1.get(CFile("a/b/c/d", 1, 1)) self.assertIsNone(ddir3)
def test_get(self): tree1 = CDirTree("a/b/c") ftree = tree1.add(CDir('a/b/c/d/e/f')) self.assertEqual(('a', 'b', 'c', 'd', 'e', 'f'), ftree.names) ddir = tree1.get(CPath('a/b/c/d/')) self.assertEqual('a/b/c/d/', ddir.path) # path type aware/unaware self.assertIsNotNone( tree1.get(CPath('a/b/c/d')) ) self.assertIsNotNone( tree1.get(CFile('a/b/c/d', 1, 1), path_type_aware=False) ) self.assertIsNone( tree1.get(CFile('a/b/c/d', 1, 1)) )
class DictMetaFileSystemBackend(BaseMetaFsBackendContract): def __init__(self, cpath_tree_dict: dict): self.__cpath_tree_dict = cpath_tree_dict self.__dict_list = self.__cpath_tree_dict['cpaths'] self.__cdir_tree = CDirTree() self.__base_path = None # build the cdir tree def json_visitor_callable(path_dict): if path_dict['type'] == 'DIR': cpath = CDir(path_dict['names']) else: assert path_dict['type'] == 'FILE' if path_dict.get('hash', None) is None: cpath = CFile(path_dict['names'], path_dict['mtime'], path_dict['size']) else: cpath = CFileHashed(path_dict['names'], path_dict['mtime'], path_dict['size'], path_dict['hash']) self.__cdir_tree.add(cpath) visit_fs_dictz(self.__dict_list, json_visitor_callable) def _get_dict_list(self): """ Currently used for testing purpose only. """ return tuple(self.__dict_list) def set_base_path(self, base_path: str): # Should I restrict to setting only once??? self.__base_path = base_path return self @property def base_path(self): bp = self.__base_path assert bp is not None return bp def _full_path(self, cpath: CPath): return os.path.join(self.base_path, cpath.path) def exists(self, cpath: CPath): """Does not care what you pass, cdir or cfile - it does not check the type of the path""" inside_cpath = self.__cdir_tree.get(cpath, path_type_aware=False) return inside_cpath is not None def is_file(self, cpath: CPath): inside_cpath = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSPathDoesNotExistException( f'Meta File System Error (occurred during checking if the cpath is a file cpath {cpath}):\n' f'X path does not exist') else: if inside_cpath.is_file(): return True else: return False def is_dir(self, cpath: CPath): return not self.is_file(cpath) def listdir(self, cpath: CPath) -> List[str]: if cpath.names_count > 0: sub_tree = self.__cdir_tree.get_sub_tree(cpath) else: sub_tree = self.__cdir_tree if sub_tree is None: return [] else: children_cpaths = sub_tree.get_children_cpaths() return list(cpath.name for cpath in children_cpaths) # TODO: throw exceptions accordingly # if inside_cpath is None: # raise CFSException( # f'File System Error (occurred during listing cpath: {cpath}):\n' # f'X Path does not exist' # ) # if inside_cpath.is_file(): # raise CFSException( # f'File System Error (occurred during listing cpath: {cpath}):\n' # f'X Cannot list on file' # ) def getmtime(self, cpath: CPath): inside_cpath = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during getmtime on cpath: {cpath}):\n' f'X Path does not exist') if inside_cpath.is_dir() or cpath.is_dir(): raise CFSException( f'File System Error (occurred during getmtime on cpath: {cpath}):\n' f'X gmtime on dir') return inside_cpath.mtime def getsize(self, cpath: CPath): inside_cpath = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X Path not found') if cpath.is_dir() or inside_cpath.is_dir(): raise CFSException( f'File System Error (occurred during getsize on cpath: {cpath}):\n' ) return inside_cpath.size def gethash(self, cpath: CPath): inside_cpath: CFileHashed = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X Path not found') if not cpath.is_file() or not inside_cpath.is_file(): raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'get hash can only be used on Files') if not isinstance(inside_cpath, CFileHashed): raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X file was not hashed') return cpath.hash def is_real_fs(self): # this is not a real fs return False
def test_visit(self): root_tree = CDirTree() root_tree.add(CFile("dir1/a.txt", 1, 2)) root_tree.add(CDir("dir2/")) root_tree.add(CDir("dir3/p/q/r")) root_tree.add(CFile("dir3/p/x.txt", 1, 5)) # dir2 is a leaf dir dir2_is_leaf: Union[bool, None] = None def visitor_1(cpath, is_leaf, tree): nonlocal dir2_is_leaf if cpath.equals_by_path_only(CPath("dir2/")): dir2_is_leaf = is_leaf root_tree.visit(visitor_1) self.assertTrue(dir2_is_leaf) # non depth first visit, the first cpath is dir1 paths_2 = [] def visitor_2(cpath, is_leaf, tree): nonlocal paths_2 paths_2.append(cpath) root_tree.visit(visitor_2, False) self.assertTrue(paths_2[0].equals(CDir("dir1/"))) # non depth first visit, the last path is # print(paths[-1].to_dict()) self.assertTrue(paths_2[-1].equals(CFile("dir3/p/x.txt", 1, 5))) # depth first visit, the first cpath is dir1/a.txt paths_3 = [] def visitor_3(cpath, is_leaf, tree): nonlocal paths_3 paths_3.append(cpath) root_tree.visit(visitor_3, True) self.assertTrue(paths_3[0].equals(CFile("dir1/a.txt", 1, 2))) # length of paths paths_4 = [] def visitor_4(cpath, is_leaf, tree): nonlocal paths_4 paths_4.append(cpath) root_tree.visit(visitor_4, False) """ Before bug fix: dir1/a.txt dir1/ dir2/ dir3/p/q/r/ dir3/p/q/ dir3/p/ dir3/ dir3/p/x.txt dir3/p/ Count: 9, instead of 8 dir3/p/ occurred two times due to having a file inside it and sub dir """ self.assertEqual(8, len(paths_4)) self.assertEqual( { "dir1/", "dir1/a.txt", "dir2/", "dir3/p/", "dir3/p/x.txt", "dir3/p/q/", "dir3/p/q/r/", "dir3/" }, set(cpath.path for cpath in paths_4) )
def test_diff(self): root_tree1 = CDirTree() root_tree1.add(CFile("dir1/a.txt", 1, 2)) root_tree1.add(CDir("dir2/")) root_tree1.add(CDir("dir3/p/q/r")) root_tree1.add(CFile("dir3/p/x.txt", 1, 5)) root_tree2 = CDirTree() root_tree2.add(CFile("dir1/a.txt", 11, 55)) root_tree2.add(CDir("dir3/p/q/r")) root_tree2.add(CFile("dir3/p/x.txt", 1, 5)) diff = root_tree1.diff(root_tree2) self.assertIsNotNone(diff.modified.get(CPath("dir1/a.txt"))) self.assertTrue(len(diff.modified.get_children_cpaths()), 1) self.assertTrue(diff.new.is_empty) self.assertIsNotNone(diff.deleted.get(CPath("dir2/"))) self.assertTrue(len(diff.deleted.get_children_cpaths()), 1)
def test_get_children_cpaths(self): root_tree = CDirTree() root_tree.add(CFile("dir1/a.txt", 1, 2)) children_cpaths = root_tree.get_children_cpaths() self.assertEqual(children_cpaths[0].path, "dir1/") self.assertIsInstance(children_cpaths[0], CDir)
def setUp(self) -> None: root_tree1 = CDirTree() root_tree1.add(CFile("d1/a.txt", 1, 2)) root_tree1.add(CDir("d2/")) root_tree1.add(CDir("d3/p/q/r")) root_tree1.add(CFile("d3/p/x.txt", 1, 5)) root_tree2 = CDirTree() root_tree2.add(CFile("d1/a.txt", 1, 2)) # root_tree2.add(CDir("dir2/")) - deleted root_tree2.add(CDir("d3/p/q/r")) root_tree2.add(CFile("d3/p/x.txt", 1, 5)) root_tree2.add(CFile("d3/p/y.txt", 1, 5)) # new # TODO: test in real file system too, with hash self.root_tree1 = root_tree1 self.root_tree2 = root_tree2 self.diff = self.root_tree1.diff(root_tree2)