def _json_read(self, fn: str, load_func: Callable) -> Dict[int, Node]: """ Consolidate json logic here - so we can change it on all for any particular run """ with open(fn, "r") as f: self.id_dict = {} if self.json_dict_list: for item in load_func(f): # safer cause key names are included, but slower self.id_dict[item['id']] = Node(**item) else: # this is the id_dict, serialzed which makes each node a Tuple - an ordered list for v in load_func(f).values(): self.id_dict[v[0]] = Node._make(v) return self.id_dict
def make_list(path: Path) -> Node: p = path.resolve(strict=True) stats = p.stat() return Node._make([ stats.st_ino, "Directory" if p.is_dir() else "File", p.name, p.parent.stat().st_ino if p.parent else None, p.stem, p.suffix[1:], f"{p.absolute()}", stats.st_size, stats.st_uid, stats.st_gid, int(stats.st_ctime), int(stats.st_atime), int(stats.st_mtime), int(oct(stats.st_mode)[-3]), int(oct(stats.st_mode)[-2]), int(oct(stats.st_mode)[-1]), ])
def make_named(path: Path) -> Node: p = path.resolve(strict=True) stats = p.stat() return Node._make( id=stats.st_ino, tag="Directory" if p.is_dir() else "File", name=p.name, parent_id=p.parent.stat().st_ino if p.parent else None, stem=p.stem, extension=p.suffix[1:], path=f"{p.absolute()}", size=stats.st_size, owner=stats.st_uid, group=stats.st_gid, created=int(stats.st_ctime), accessed=int(stats.st_atime), modified=int(stats.st_mtime), owner_perm=int(oct(stats.st_mode)[-3]), group_perm=int(oct(stats.st_mode)[-2]), other_perm=int(oct(stats.st_mode)[-1]), )
def make_dict(path: Path) -> Node: p = path.resolve(strict=True) stats = p.stat() data = { "tag": "Directory" if p.is_dir() else "File", "id": stats.st_ino, "parent_id": p.parent.stat().st_ino if p.parent else None, "name": p.name, "stem": p.stem, "extension": p.suffix[1:], # omit the leading dot "path": f"{p.absolute()}", "size": stats.st_size, "created": int(stats.st_ctime), "accessed": int(stats.st_atime), "modified": int(stats.st_mtime), "owner": stats.st_uid, "group": stats.st_gid, "owner_perm": int(oct(stats.st_mode)[-3]), "group_perm": int(oct(stats.st_mode)[-2]), "other_perm": int(oct(stats.st_mode)[-1]), } return Node._make(**data)
def read(self) -> Union[Dict, TreeNode]: """ I return the best representation the source format supports pickle: TreeNode else : Dict[inode -> properties] """ fn = self._path() if self.filetype == FileType.PICKLE: with open(fn, "rb") as f: self.treenode = pickle.load(f) return self.treenode elif self.filetype == FileType.CSV: self.id_dict = {} with open(fn, "r") as f: r = csv.DictReader(f) for line in r: # type conversion for field in [k for k,v in Node._field_types.items() if v != str]: line[field] = int(line[field]) self.id_dict[int(line['id'])] = Node(**line) return self.id_dict elif self.filetype == FileType.MSGPACK: # TODO: This will fail with larger files - have to adjust max_xxx_len with open(fn, "rb") as f: self.id_dict = {} for item in msgpack.unpack(f, raw=False): self.id_dict[item['id']] = Node(**item) return self.id_dict elif self.filetype == FileType.JSON: return self._json_read(fn, json.load) elif self.filetype == FileType.UJSON: return self._json_read(fn, ujson.load) elif self.filetype == FileType.SIMPLEJSON: # NOTE: simplejson includes key names when serializing NamedTuples with open(fn, "r") as f: self.id_dict = {} if self.json_dict_list: for item in simplejson.load(f): self.id_dict[item['id']] = Node(**item) else: for v in simplejson.load(f).values(): self.id_dict[v['id']] = Node(**v) return self.id_dict elif self.filetype == FileType.CBOR2: with open(fn, "rb") as f: self.id_dict = {} for item in cbor2.load(f): self.id_dict[item['id']] = Node(**item) return self.id_dict elif self.filetype == FileType.CBOR: with open(fn, "rb") as f: self.id_dict = {} for item in cbor.load(f): self.id_dict[item['id']] = Node(**item) return self.id_dict elif self.filetype == FileType.RAPIDJSON: self.id_dict = {} with open(fn, "r") as f: d = rapidjson.Decoder(number_mode=rapidjson.NM_NATIVE)(f) if self.json_dict_list: for item in d: # safer cause key names are included, but slower self.id_dict[item['id']] = Node(**item) else: # list(self.id_dict.values()) - produces a list of lists for item in d: self.id_dict[item[0]] = Node._make(item) return self.id_dict elif self.filetype == FileType.BSON: self.id_dict = {} with open(fn, "rb") as f: for doc in decode_file_iter(f): self.id_dict[doc['id']] = Node(**doc) return self.id_dict