def tagsetses_from_path(cls, ont_path: str): ''' Return `(tagsets,ont_pfx_map)` instance from `ont_path`, being the default `TagSets` and a mapping of name->`TagSets` for various subontologies. If `ont_path` resolves to a file the mapping wil be empty; return an `SQLTags` if `ont_path` ends with `'.sqlite'` otherwise a `TagFile`. If `ont_path` resolves to a directory, scan the entries. An entry named *prefix*`.sqlite` adds a *prefix*->`SQLTags` entry to the mapping. An entry named *prefix*`.tags` adds a *prefix*->`TagFile` entry to the mapping. After the scan, `tagsets` is set from the entry whose prefix was `'_'`, or `None`. ''' ont_pfx_map = {} if isfilepath(ont_path): if ont_path.endswith('.sqlite'): tagsets = SQLTags(ont_path) else: tagsets = TagFile(ont_path) elif isdirpath(ont_path): with Pfx("listdir(%r)", ont_path): for subont_name in os.listdir(ont_path): if not subont_name or subont_name.startswith('.'): continue subont_path = joinpath(ont_path, subont_name) with Pfx(subont_path): if not isfilepath(subont_path): warning("not a file") prefix = cutsuffix(subont_name, '.sqlite') if prefix is not subont_name: ont_pfx_map[prefix] = SQLTags(subont_path) continue prefix = cutsuffix(subont_name, '.tags') if prefix is not subont_name: ont_pfx_map[prefix] = TagFile(subont_path) continue warning("unsupported name, does not end in .sqlite or .tags") continue tagsets = ont_pfx_map.pop('_', None) else: if not ont_path.endswith('.sqlite'): ont_path_sqlite = ont_path + '.sqlite' if isfilepath(ont_path_sqlite): return cls.tagsetses_from_path(ont_path_sqlite) raise ValueError(f"unsupported ont_path={ont_path!r}") return tagsets, ont_pfx_map
def startup_shutdown(self): # Define the window's contents self.tree = PathListWidget( self.fspaths, key="paths", fixed_size=(200, None), expand_x=True, expand_y=True, show_expanded=True, pad=(3, 3), ) self.pathview = PathView(tagger=self.tagger) layout = [ [ self.tree, self.pathview, ], [sg.Text("BAH")], ] self.window = sg.Window( str(self), layout, size=(2200, 1500), finalize=True, ) if False: for record in self.tree: if isfilepath(record.fullpath): print("set self.fspath =", repr(record.fullpath)) self.fspath = record.fullpath break print("window made") yield self print("closing") self.window.close()
def __init__(self, filename, dictclass=UUIDedDict, create=False): ''' Initialise the mapping. Parameters: * `filename`: the file containing the newline delimited JSON data; this need not yet exist * `dictclass`: a optional `dict` subclass to hold each record, default `UUIDedDict` * `create`: if true, ensure the file exists by transiently opening it for append if it is missing; default `False` ''' if hasattr(self, '_lock'): return self.__ndjson_filename = filename self.__dictclass = dictclass if (create and not isfilepath(filename) and not isfilepath(filename + '.gz')): # make sure the file exists with gzifopen(filename, 'a'): # pylint: disable=unspecified-encoding pass self.scan_errors = [] self._lock = RLock()
def cmd_linktree(self, argv): ''' Usage: {cmd} srctrees... dsttree Link media files from the srctrees into the dsttree using the Plex naming conventions. ''' if len(argv) < 2: raise GetoptError("missing srctrees or dsttree") dstroot = argv.pop() srcroots = argv options = self.options fstags = options.fstags if not isdirpath(dstroot): raise GetoptError("dstroot does not exist: %s" % (dstroot,)) for srcroot in srcroots: with Pfx(srcroot): for filepath in srcroot if isfilepath(srcroot) else sorted( rfilepaths(srcroot)): with Pfx(filepath): plex_linkpath(fstags, filepath, dstroot)
def VTDStore(name, path, *, hashclass, preferred_indexclass=None): ''' Factory to return a `MappingStore` using a `BackingFile` using a single `.vtd` file. ''' if hashclass is None: hashclass = DEFAULT_HASHCLASS with Pfx(path): if not path.endswith('.vtd'): warning("does not end with .vtd") if not isfilepath(path): raise ValueError("missing path %r" % (path, )) pathbase, _ = splitext(path) index_basepath = f"{pathbase}-index-{hashclass.HASHNAME}" indexclass = choose_indexclass( index_basepath, preferred_indexclass=preferred_indexclass) binary_index = indexclass(index_basepath) index = BinaryHashCodeIndex(hashclass=hashclass, binary_index=binary_index, index_entry_class=BackingFileIndexEntry) return MappingStore(name, CompressibleBackingFile(path, hashclass=hashclass, index=index), hashclass=hashclass)
def get_store_spec(s, offset=0): ''' Get a single Store specification from a string. Return `(matched, type, params, offset)` being the matched text, store type, parameters and the new offset. Recognised specifications: * `"text"`: Quoted store spec, needed to enclose some of the following syntaxes if they do not consume the whole string. * `[clause_name]`: The name of a clause to be obtained from a Config. * `/path/to/something`, `./path/to/something`: A filesystem path to a local resource. Supported paths: - `.../foo.sock`: A UNIX socket based StreamStore. - `.../dir`: A DataDirStore directory. - `.../foo.vtd `: (STILL TODO): A VTDStore. * `|command`: A subprocess implementing the streaming protocol. * `store_type(param=value,...)`: A general Store specification. * `store_type:params...`: An inline Store specification. Supported inline types: `tcp:[host]:port` TODO: * `ssh://host/[store-designator-as-above]`: * `unix:/path/to/socket`: Connect to a daemon implementing the streaming protocol. * `http[s]://host/prefix`: A Store presenting content under prefix: + `/h/hashcode.hashtype`: Block data by hashcode + `/i/hashcode.hashtype`: Indirect block by hashcode. * `s3://bucketname/prefix/hashcode.hashtype`: An AWS S3 bucket with raw blocks. ''' offset0 = offset if offset >= len(s): raise ValueError("empty string") if s.startswith('"', offset): # "store_spec" qs, offset = get_qstr(s, offset, q='"') _, store_type, params, offset2 = get_store_spec(qs, 0) if offset2 < len(qs): raise ValueError("unparsed text inside quotes: %r" % (qs[offset2:], )) elif s.startswith('[', offset): # [clause_name] store_type = 'config' clause_name, offset = get_ini_clausename(s, offset) params = {'clause_name': clause_name} elif s.startswith('/', offset) or s.startswith('./', offset): path = s[offset:] offset = len(s) if path.endswith('.sock'): store_type = 'socket' params = {'socket_path': path} elif isdirpath(path): store_type = 'datadir' params = {'path': path} elif isfilepath(path): store_type = 'datafile' params = {'path': path} else: raise ValueError("%r: not a directory or a socket" % (path, )) elif s.startswith('|', offset): # |shell command store_type = 'shell' params = {'shcmd': s[offset + 1:].strip()} offset = len(s) else: store_type, offset = get_identifier(s, offset) if not store_type: raise ValueError("expected identifier at offset %d, found: %r" % (offset, s[offset:])) with Pfx(store_type): if s.startswith('(', offset): params, offset = get_params(s, offset) elif s.startswith(':', offset): offset += 1 params = {} if store_type == 'tcp': colon2 = s.find(':', offset) if colon2 < offset: raise ValueError( "missing second colon after offset %d" % (offset, )) hostpart = s[offset:colon2] offset = colon2 + 1 if not isinstance(hostpart, str): raise ValueError( "expected hostpart to be a string, got: %r" % (hostpart, )) if not hostpart: hostpart = 'localhost' params['host'] = hostpart portpart, offset = get_token(s, offset) params['port'] = portpart else: raise ValueError("unrecognised Store type for inline form") else: raise ValueError("no parameters") return s[offset0:offset], store_type, params, offset
def cmd_autofile(self, argv): ''' Usage: {cmd} pathnames... Link pathnames to destinations based on their tags. -d Treat directory pathnames like file - file the directory, not its contents. (TODO: we file by linking - this needs a rename.) -n No link (default). Just print filing actions. -r Recurse. Required to autofile a directory tree. -x Remove the source file if linked successfully. Implies -y. -y Link files to destinations. ''' direct = False recurse = False no_link = True do_remove = False opts, argv = getopt(argv, 'dnrxy') for opt, _ in opts: with Pfx(opt): if opt == '-d': direct = True elif opt == '-n': no_link = True do_remove = False elif opt == '-r': recurse = True elif opt == '-x': no_link = False do_remove = True elif opt == '-y': no_link = False else: raise RuntimeError("unimplemented option") if not argv: raise GetoptError("missing pathnames") tagger = self.options.tagger fstags = tagger.fstags for path in argv: with Pfx(path): if direct or not isdirpath(path): self._autofile(path, tagger=tagger, no_link=no_link, do_remove=do_remove) elif not recurse: pfxprint("not autofiling directory, use -r for recursion") else: for subpath, dirnames, filenames in os.walk(path): with Pfx(subpath): # order the descent dirnames[:] = sorted( dname for dname in dirnames if dname and not dname.startswith('.')) tagged = fstags[subpath] if 'tagger.skip' in tagged: # prune this directory tree dirnames[:] = [] continue for filename in sorted(filenames): with Pfx(filename): filepath = joinpath(subpath, filename) if not isfilepath(filepath): pfxprint( "not a regular file, skipping") continue self._autofile( filepath, tagger=tagger, no_link=no_link, do_remove=do_remove, )
def pngfor(path, max_size=None, *, min_size=None, cached=None, force=False): ''' Create a PNG version of the image at `path`, scaled to fit within some size constraints. return the pathname of the PNG file. Parameters: * `max_size`: optional `(width,height)` tuple, default `(1920,1800)` * `min_size`: optional `(width,height)` tuple, default half of `max_size` * `cached`: optional mapping of `(path,'png',size)`->`pngof_path` where size is the chosen final size tuple * `force`: optional flag (default `False`) to force recreation of the PNG version and associated cache entry ''' if max_size is None: max_size = 1920, 1080 if min_size is None: min_size = max_size[0] // 2, max_size[1] // 2 if cached is None: cached = _conv_cache tagged = _fstags[path] path = tagged.filepath size = image_size(path) if size is None: return None # choose a target size if size[0] > max_size[0] or size[1] > max_size[1]: scale = min(max_size[0] / size[0], max_size[1] / size[1]) re_size = int(size[0] * scale), int(size[1] * scale) ##warning("too big, rescale by %s from %r to %r", scale, size, re_size) key = path, 'png', re_size elif size[0] < min_size[0] or size[1] < min_size[1]: scale = min(min_size[0] / size[0], min_size[1] / size[1]) re_size = int(size[0] * scale), int(size[1] * scale) ##warning("too small, rescale by %s from %r to %r", scale, size, re_size) key = path, 'png', re_size else: re_size = None key = path, 'png', size cached_path = cached.get(key) if cached_path: return cached_path if tagged['pil.format'] == 'PNG' and re_size is None: # right format, same size - return ourself cached[key] = tagged.filepath return tagged.filepath # path to converted file hashcode = SHA256.from_pathname(path) pngbase = f'{hashcode}.png' if not isdirpath(CONVCACHE_ROOT): pfx_call(os.mkdir, CONVCACHE_ROOT) convsize = re_size or size convdirpath = joinpath(CONVCACHE_ROOT, f'png/{convsize[0]}x{convsize[1]}') if not isdirpath(convdirpath): pfx_call(os.makedirs, convdirpath) pngpath = joinpath(convdirpath, pngbase) if force or not isfilepath(pngpath): try: with Image.open(path) as im: if re_size is None: pfx_call(im.save, pngpath, 'PNG') else: im2 = im.resize(re_size) pfx_call(im2.save, pngpath, 'PNG') except UnidentifiedImageError as e: warning("unhandled image: %s", e) pngpath = None cached[key] = pngpath return pngpath
def _monitor_datafiles(self): ''' Thread body to poll the ideal tree for new or changed files. ''' proxy = upd_state.proxy proxy.prefix = str(self) + " monitor " meta_store = self.meta_store filemap = self._filemap datadirpath = self.pathto('data') if meta_store is not None: topdir = self.topdir else: warning("%s: no meta_store!", self) updated = False disabled = False while not self.cancelled: sleep(self.DELAY_INTERSCAN) if self.flag_scan_disable: if not disabled: info("scan %r DISABLED", shortpath(datadirpath)) disabled = True continue if disabled: info("scan %r ENABLED", shortpath(datadirpath)) disabled = False # scan for new datafiles with Pfx("%r", datadirpath): seen = set() info("scan tree...") with proxy.extend_prefix(" scan"): for dirpath, dirnames, filenames in os.walk(datadirpath, followlinks=True): dirnames[:] = sorted(dirnames) filenames = sorted(filenames) sleep(self.DELAY_INTRASCAN) if self.cancelled or self.flag_scan_disable: break rdirpath = relpath(dirpath, datadirpath) with Pfx(rdirpath): with (proxy.extend_prefix(" " + rdirpath) if filenames else nullcontext()): # this will be the subdirectories into which to recurse pruned_dirnames = [] for dname in dirnames: if self.exclude_dir(joinpath(rdirpath, dname)): # unwanted continue subdirpath = joinpath(dirpath, dname) try: S = os.stat(subdirpath) except OSError as e: # inaccessable warning("stat(%r): %s, skipping", subdirpath, e) continue ino = S.st_dev, S.st_ino if ino in seen: # we have seen this subdir before, probably via a symlink # TODO: preserve symlinks? attach alter ego directly as a Dir? debug( "seen %r (dev=%s,ino=%s), skipping", subdirpath, ino[0], ino[1] ) continue seen.add(ino) pruned_dirnames.append(dname) dirnames[:] = pruned_dirnames if meta_store is None: warning("no meta_store") D = None else: with meta_store: D = topdir.makedirs(rdirpath, force=True) # prune removed names names = list(D.keys()) for name in names: if name not in dirnames and name not in filenames: info("del %r", name) del D[name] for filename in filenames: with Pfx(filename): if self.cancelled or self.flag_scan_disable: break rfilepath = joinpath(rdirpath, filename) if self.exclude_file(rfilepath): continue filepath = joinpath(dirpath, filename) if not isfilepath(filepath): continue # look up this file in our file state index DFstate = filemap.get(rfilepath) if (DFstate is not None and D is not None and filename not in D): # in filemap, but not in dir: start again warning("in filemap but not in Dir, rescanning") filemap.del_path(rfilepath) DFstate = None if DFstate is None: DFstate = filemap.add_path(rfilepath) try: new_size = DFstate.stat_size(self.follow_symlinks) except OSError as e: if e.errno == errno.ENOENT: warning("forgetting missing file") self._del_datafilestate(DFstate) else: warning("stat: %s", e) continue if new_size is None: # skip non files debug("SKIP non-file") continue if meta_store: try: E = D[filename] except KeyError: E = FileDirent(filename) D[filename] = E else: if not E.isfile: info( "new FileDirent replacing previous nonfile: %s", E ) E = FileDirent(filename) D[filename] = E if new_size > DFstate.scanned_to: with proxy.extend_prefix( " scan %s[%d:%d]" % (filename, DFstate.scanned_to, new_size)): if DFstate.scanned_to > 0: info("scan from %d", DFstate.scanned_to) if meta_store is not None: blockQ = IterableQueue() R = meta_store._defer( lambda B, Q: top_block_for(spliced_blocks(B, Q)), E.block, blockQ ) scan_from = DFstate.scanned_to scan_start = time() scanner = DFstate.scanfrom(offset=DFstate.scanned_to) if defaults.show_progress: scanner = progressbar( DFstate.scanfrom(offset=DFstate.scanned_to), "scan " + rfilepath, position=DFstate.scanned_to, total=new_size, units_scale=BINARY_BYTES_SCALE, itemlenfunc=lambda t3: t3[2] - t3[0], update_frequency=128, ) for pre_offset, data, post_offset in scanner: hashcode = self.hashclass.from_chunk(data) entry = FileDataIndexEntry( filenum=DFstate.filenum, data_offset=pre_offset, data_length=len(data), flags=0, ) entry_bs = bytes(entry) with self._lock: index[hashcode] = entry_bs if meta_store is not None: B = Block(data=data, hashcode=hashcode, added=True) blockQ.put((pre_offset, B)) DFstate.scanned_to = post_offset if self.cancelled or self.flag_scan_disable: break if meta_store is not None: blockQ.close() try: top_block = R() except MissingHashcodeError as e: error("missing data, forcing rescan: %s", e) DFstate.scanned_to = 0 else: E.block = top_block D.changed = True updated = True elapsed = time() - scan_start scanned = DFstate.scanned_to - scan_from if elapsed > 0: scan_rate = scanned / elapsed else: scan_rate = None if scan_rate is None: info( "scanned to %d: %s", DFstate.scanned_to, transcribe_bytes_geek(scanned) ) else: info( "scanned to %d: %s at %s/s", DFstate.scanned_to, transcribe_bytes_geek(scanned), transcribe_bytes_geek(scan_rate) ) # stall after a file scan, briefly, to limit impact if elapsed > 0: sleep(min(elapsed, self.DELAY_INTRASCAN)) # update the archive after updating from a directory if updated and meta_store is not None: self.sync_meta() updated = False self.flush()
def __init__(self, mobipath): if not isfilepath(mobipath): raise ValueError("mobipath %r is not a file" % (mobipath,)) self.path = mobipath
def parse_special(self, special, readonly): ''' Parse the mount command's special device from `special`. Return `(fsname,readonly,Store,Dir,basename,archive)`. Supported formats: * `D{...}`: a raw `Dir` transcription. * `[`*clause*`]`: a config clause name. * `[`*clause*`]`*archive*: a config clause name and a reference to a named archive associates with that clause. * *archive_file*`.vt`: a path to a `.vt` archive file. ''' fsname = special specialD = None special_store = None archive = None if special.startswith('D{') and special.endswith('}'): # D{dir} specialD, offset = parse(special) if offset != len(special): raise ValueError("unparsed text: %r" % (special[offset:], )) if not isinstance(specialD, Dir): raise ValueError( "does not seem to be a Dir transcription, looks like a %s" % (type(specialD), )) special_basename = specialD.name if not readonly: warning("setting readonly") readonly = True elif special.startswith('['): if special.endswith(']'): # expect "[clause]" clause_name, offset = get_ini_clausename(special) archive_name = '' special_basename = clause_name else: # expect "[clause]archive" # TODO: just pass to Archive(special,config=self)? # what about special_basename then? clause_name, archive_name, offset = get_ini_clause_entryname( special) special_basename = archive_name if offset < len(special): raise ValueError("unparsed text: %r" % (special[offset:], )) fsname = str(self) + special try: special_store = self[clause_name] except KeyError: raise ValueError("unknown config clause [%s]" % (clause_name, )) if archive_name is None or not archive_name: special_basename = clause_name else: special_basename = archive_name archive = special_store.get_Archive(archive_name) else: # pathname to archive file arpath = special if not isfilepath(arpath): raise ValueError("not a file") fsname = shortpath(realpath(arpath)) spfx, sext = splitext(basename(arpath)) if spfx and sext == '.vt': special_basename = spfx else: special_basename = special archive = FilePathArchive(arpath) return fsname, readonly, special_store, specialD, special_basename, archive