def make_cbz(self, cbzpath=None): ''' Create a CBZ file from the images in the MOBI file. Return the path to the created CBZ file. ''' if cbzpath is None: mobibase, mobiext = splitext(basename(self.path)) cbzpath = mobibase + '.cbz' if existspath(cbzpath): raise ValueError("CBZ path %r already exists" % (cbzpath,)) with self.extracted() as df: dirpath, rfilepath = df imagepaths = sorted(glob(joinpath(dirpath, 'mobi8/OEBPS/Images/*.*'))) info("write %s", cbzpath) try: with pfx_call(ZipFile, cbzpath, 'x', compression=ZIP_STORED) as cbz: for imagepath in imagepaths: pfx_call(cbz.write, imagepath, arcname=basename(imagepath)) except FileExistsError as e: error("CBZ already eixsts: %r: %s", cbzpath, e) return 1 except Exception: if existspath(cbzpath): pfx_call(os.unlink, cbzpath) raise return cbzpath
def phl_xml(self): ''' Decode the `.phl` XML file if present and return an XML `ElementTree`. Return `None` if the file is not present. This file seems to contain popular highlights in the `popular/content/annotation` tags. ''' phl_path = self.subpath(self.subdir_name + '.phl') try: with pfx_call(open, phl_path, 'rb') as f: xml_bs = f.read() except FileNotFoundError: return None with Pfx(phl_path): return pfx_call(etree.fromstring, xml_bs)
def startup_shutdown(self): ''' Default startup/shutdown context manager. This base method operates a lockfile to manage concurrent access by other programmes (which would also need to honour this file). If you actually expect this to be common you should try to keep the `ORM` "open" as briefly as possible. The lock file is only operated if `self.db_fspath`, current set only for filesystem SQLite database URLs. ''' if self.db_fspath: self._lockfilepath = makelockfile(self.db_fspath, poll_interval=0.2) yield if self._lockfilepath is not None: pfx_call(os.remove, self._lockfilepath) self._lockfilepath = None
def cmd_resize(argv): ''' Usage: {cmd} vdipath new_size_mb Resize a .vdi file to new_size_mb, a size in megabytes. ''' if not argv: raise GetoptError("missing vdi") vdipath = argv.pop(0) with Pfx("vdipath %r", vdipath): if not vdipath.endswith('.vdi'): raise GetoptError("does not end with .vdi") if not existspath(vdipath): raise GetoptError("does not exist") if not argv: raise GetoptError("missing new_size_mb") new_size_mb_s = argv.pop(0) with Pfx("new_size_mb %r", new_size_mb_s): try: new_size_mb = int(new_size_mb_s) except ValueError as e: raise GetoptError("not an integer: %s" % (e, )) else: if new_size_mb <= 0: raise GetoptError("must be >0") try: return pfx_call(resizevdi, vdipath, new_size_mb, trace=True) except ValueError as e: error("resize fails: %s", e) return 1
def cmd_ls(self, argv): ''' Usage: {cmd} [-l] List the contents of the Calibre library. ''' long = False if argv and argv[0] == '-l': long = True argv.pop(0) if argv: raise GetoptError("extra arguments: %r" % (argv, )) options = self.options calibre = options.calibre for book in calibre: with Pfx("%d:%s", book.id, book.title): print(f"{book.title} ({book.dbid})") if long: print(" ", book.path) identifiers = book.identifiers_as_dict() if identifiers: print(" ", TagSet(identifiers)) for fmt, subpath in book.formats_as_dict().items(): with Pfx(fmt): fspath = calibre.pathto(subpath) size = pfx_call(os.stat, fspath).st_size print(" ", fmt, transcribe_bytes_geek(size), subpath)
def _run(self, *calargv, subp_options=None): ''' Run a Calibre utility command. Parameters: * `calargv`: an iterable of the calibre command to issue; if the command name is not an absolute path it is expected to come from `self.CALIBRE_BINDIR_DEFAULT` * `subp_options`: optional mapping of keyword arguments to pass to `subprocess.run` ''' X("calargv=%r", calargv) if subp_options is None: subp_options = {} subp_options.setdefault('check', True) cmd, *calargv = calargv if not isabspath(cmd): cmd = joinpath(self.CALIBRE_BINDIR_DEFAULT, cmd) print("RUN", cmd, *calargv) try: cp = pfx_call(run, [cmd, *calargv], **subp_options) except CalledProcessError as cpe: error( "run fails, exit code %s:\n %s", cpe.returncode, ' '.join(map(repr, cpe.cmd)), ) if cpe.stderr: print(cpe.stderr.replace('\n', ' \n'), file=sys.stderr) raise return cp
def extract(self, dirpath=None): ''' Extract the contents of the MOBI file into a directory. Return `(dirpath,rfilepath)` where `dirpath` is the extracted file tree and `filepath` is the relative pathname of the primary epub, html or pdf file depending on the mobi type. ''' if dirpath is not None and existspath(dirpath): raise ValueError("dirpath %r already exists" % (dirpath,)) # divert stdout because the mobi library sends some warnings etc to stdout with stackattrs(sys, stdout=sys.stderr): tmpdirpath, filepath = pfx_call(mobi.extract, self.path) rfilepath = relpath(filepath, tmpdirpath) if dirpath is None: dirpath = tmpdirpath else: pfx_call(os.rename, tmpdirpath, dirpath) return dirpath, rfilepath
def _parse_value(value_s): try: value, offset = pfx_call(Tag.parse_value, value_s) except ValueError as e: warning("EditValue._parse_value(%s): %s", r(value_s), e) value = value_s else: if offset < len(value_s): warning("unparsed: %r", value_s[offset:]) if isinstance(value, str): value += value_s[offset:] else: value = value_s return value
def _find_library_dbpath(self): ''' Look up the path of the SQLite database. ''' dbpaths = pfx_call(glob, joinpath(self.tree.fspath, self.DB_GLOB)) if not dbpaths: raise ValueError("no matching library file") if len(dbpaths) > 1: dbpaths = sorted(dbpaths) warning( " \n".join(["multiple matches, choosing the latest:", *dbpaths]) ) dbpath = dbpaths[-1] else: dbpath, = dbpaths return dbpath
def make_cbz(self, replace_format=False): ''' Create a CBZ format from the AZW3 Mobi format. ''' from .mobi import Mobi # pylint: disable=import-outside-toplevel calibre = self.tree formats = self.formats_as_dict() if 'CBZ' in formats and not replace_format: warning("format CBZ already present, not adding") else: mobi_subpath = self.mobi_subpath if mobi_subpath: mobipath = calibre.pathto(mobi_subpath) base, _ = splitext(basename(mobipath)) MB = Mobi(mobipath) with TemporaryDirectory() as tmpdirpath: cbzpath = joinpath(tmpdirpath, base + '.cbz') pfx_call(MB.make_cbz, cbzpath) calibre.add_format(cbzpath, self.dbid, force=replace_format) else: raise ValueError( "no AZW3, AZW or MOBI format from which to construct a CBZ" )
def get_value(w): ''' Obtain the new value from the widget contents. ''' edited = w.get(1.0, tk.END).rstrip('\n') try: new_value, offset = pfx_call(Tag.parse_value, edited) except ValueError as e: warning("toggle_editmode: %s", e) new_value = edited else: if offset < len(edited): warning("unparsed: %r", edited[offset:]) if isinstance(new_value, str): new_value += edited[offset:] else: new_value = edited return new_value
def get(self): ''' Return the value derived from the contents via `Tag.parse_value`. An attempt is made to cope with unparsed values. ''' s = super().get() try: value, offset = pfx_call(Tag.parse_value, s) except ValueError as e: warning(str(e)) value = s else: if offset < len(s): warning("unparsed: %r", s[offset:]) if isinstance(value, str): value += s[offset:] else: value = s return value
def parse(self, s, offset=0): ''' Parse an object from the string `s` starting at `offset`. Return the object and the new offset. Parameters: * `s`: the source string * `offset`: optional string offset, default 0 ''' # strings value, offset2 = self.parse_qs(s, offset, optional=True) if value is not None: return value, offset2 # decimal values if s[offset:offset + 1].isdigit(): return get_decimal_or_float_value(s, offset) # {json} if s.startswith('{', offset): sub = s[offset:] m, suboffset = pfx_call(json.JSONDecoder().raw_decode, sub) offset += suboffset return m, offset # prefix{....} prefix, offset = get_identifier(s, offset) if not prefix: raise ValueError("no type prefix at offset %d" % (offset, )) with Pfx("prefix %r", prefix): if offset >= len(s) or s[offset] != '{': raise ValueError("missing opening '{' at offset %d" % (offset, )) offset += 1 baseclass = self.prefix_map.get(prefix) if baseclass is None: raise ValueError("prefix not registered: %r" % (prefix, )) with Pfx("baseclass=%s", baseclass.__name__): o, offset = baseclass.parse_inner(self, s, offset, '}', prefix) if offset > len(s): raise ValueError("parse_inner returns offset beyond text") if offset >= len(s) or s[offset] != '}': raise ValueError("missing closing '}' at offset %d" % (offset, )) offset += 1 return o, offset
def auto_name(self, srcpath, dstdirpath, tags): ''' Generate a filename computed from `srcpath`, `dstdirpath` and `tags`. ''' tagged = self.fstags[dstdirpath] formats = self.conf_tag(tagged.merged_tags(), 'auto_name', ()) if isinstance(formats, str): formats = [formats] if formats: if not isinstance(tags, TagSet): tags = TagSet() for tag in tags: tags.add(tag) for fmt in formats: with Pfx(repr(fmt)): try: formatted = pfx_call(tags.format_as, fmt, strict=True) if formatted.endswith('/'): formatted += basename(srcpath) return formatted except FormatAsError: ##warning("%s", e) ##print("auto_name(%r): %r: %s", srcpath, fmt, e) continue return basename(srcpath)
def file_by_tags(self, path: str, prune_inherited=False, no_link=False, do_remove=False): ''' Examine a file's tags. Where those tags imply a location, link the file to that location. Return the list of links made. Parameters: * `path`: the source path to file * `prune_inherited`: optional, default `False`: prune the inherited tags from the direct tags on the target * `no_link`: optional, default `False`; do not actually make the hard link, just report the target * `do_remove`: optional, default `False`; remove source files if successfully linked Note: if `path` is already linked to an implied location that location is also included in the returned list. The filing process is as follows: - for each target directory, initially `dirname(path)`, look for a filing map on tag `file_by_mapping` - for each directory in that mapping which matches a tag from `path`, queue it as an additional target directory - if there were no matching directories, file `path` at the current target directory under the filename returned by `{TAGGER_TAG_PREFIX_DEFAULT}.auto_name` ''' if do_remove and no_link: raise ValueError("do_remove and no_link may not both be true") fstags = self.fstags # start the queue with the resolved `path` tagged = fstags[path] srcpath = tagged.filepath tags = tagged.all_tags # a queue of reference directories q = ListQueue((dirname(srcpath), )) linked_to = [] seen = set() for refdirpath in unrepeated(q, signature=abspath, seen=seen): with Pfx(refdirpath): # places to redirect this file mapping = self.file_by_mapping(refdirpath) interesting_tag_names = {tag.name for tag in mapping.keys()} # locate specific filing locations in the refdirpath refile_to = set() for tag_name in sorted(interesting_tag_names): with Pfx("tag_name %r", tag_name): if tag_name not in tags: continue bare_tag = Tag(tag_name, tags[tag_name]) try: target_dirs = mapping.get(bare_tag, ()) except TypeError as e: warning(" %s not mapped (%s), skipping", bare_tag, e) continue if not target_dirs: continue # collect other filing locations refile_to.update(target_dirs) # queue further locations if they are new if refile_to: new_refile_to = set(map(abspath, refile_to)) - seen if new_refile_to: q.extend(new_refile_to) continue # file locally (no new locations) dstbase = self.auto_name(srcpath, refdirpath, tags) with Pfx("%s => %s", refdirpath, dstbase): dstpath = dstbase if isabspath(dstbase) else joinpath( refdirpath, dstbase) if existspath(dstpath): if not samefile(srcpath, dstpath): warning("already exists, skipping") continue if no_link: linked_to.append(dstpath) else: linkto_dirpath = dirname(dstpath) if not isdirpath(linkto_dirpath): pfx_call(os.mkdir, linkto_dirpath) try: pfx_call(os.link, srcpath, dstpath) except OSError as e: warning("cannot link to %r: %s", dstpath, e) else: linked_to.append(dstpath) fstags[dstpath].update(tags) if prune_inherited: fstags[dstpath].prune_inherited() if linked_to and do_remove: S = os.stat(srcpath) if S.st_nlink < 2: warning("not removing %r, unsufficient hard links (%s)", srcpath, S.st_nlink) else: pfx_call(os.remove, srcpath) return linked_to
def cmd_log(self, argv): ''' Usage: {cmd} [{{CATEGORIES:|tag=value}}...] headline Options: -c categories Alternate categories specification. -d datetime Timestamp for the log entry instead of "now". ''' options = self.options sqltags = options.sqltags badopts = False opts, argv = getopt(argv, 'c:d:') for opt, val in opts: with Pfx(opt if val is None else "%s %r" % (opt, val)): if opt == '-c': self.options.categories.update(self.cats_from_str(val)) elif opt == '-d': try: dt = pfx_call(datetime.fromisoformat, val) except ValueError as e: # pylint: disable=raise-missing-from raise GetoptError("unparsed date: %s" % (e, )) else: if dt.tzinfo is None: # create a nonnaive datetime in the local zone dt = dt.astimezone() self.options.when = datetime2unixtime(dt) else: raise RuntimeError("unimplemented option") # Gather leading CAT: and tag= arguments while argv: arg0 = argv[0] with Pfx(repr(arg0)): # CATS,...: m = self.CATS_RE.match(arg0) if m: argv.pop(0) options.categories.update( cat.lower() for cat in m.group(0)[:-1].split(',') if cat) continue # tag_name=... tag_name, offset = get_dotted_identifier(arg0) if tag_name and offset < len(arg0) and arg0[offset] == '=': argv.pop(0) try: tag = Tag.from_str(arg0) except ValueError as e: debug("invalid tag: %s", e) options.tags.add(Tag(tag_name, arg0[offset:])) else: options.tags.add(tag) continue break if badopts: raise GetoptError("invalid preargv") if not argv: raise GetoptError("no headline") if not options.categories: options.categories.update( self.cats_from_str(options.fstags['.'].all_tags.get( 'cs.dlog', ''))) headline = ' '.join(argv) dlog(headline, logpath=options.logpath, sqltags=sqltags, tags=options.tags, categories=options.categories, when=options.when)
def dlog( headline: str, *, logpath: Optional[str] = None, sqltags: Optional[SQLTags] = None, tags=None, categories: Optional[Iterable] = None, when: Union[None, int, float, datetime] = None, ): ''' Log `headline` to the dlog. Parameters: * `headline`: the log line message * `logpath`: optional text log pathname, default `{DEFAULT_LOGPATH}` from DEFAULT_LOGPATH * `sqltags`: optional `SQLTags` instance, default uses `{DEFAULT_DBPATH}` from DEFAULT_DBPATH * `tags`: optional iterable of `Tag`s to associate with the log entry * `categories`: optional iterable of category strings * `when`: optional UNIX time or `datetime`, default now ''' if sqltags is None: # pylint: disable=redefined-argument-from-local with SQLTags(expanduser(DEFAULT_DBPATH)) as sqltags: dlog(headline, logpath=logpath, sqltags=sqltags, tags=tags, categories=categories, when=when) if logpath is None: logpath = expanduser(DEFAULT_LOGPATH) logtags = TagSet() if tags: for tag in tags: logtags.add(tag) categories = sorted(( ) if categories is None else set(map(str.lower, categories))) if when is None: when = time.time() elif isinstance(when, (int, float)): pass elif isinstance(when, datetime): dt = when if dt.tzinfo is None: # create a nonnaive datetime in the local zone dt = dt.astimezone() when = datetime2unixtime(dt) else: raise TypeError("when=%s:%r: unhandled type" % (type(when).__name__, when)) tt = time.localtime(when) print_args = [ '{:4d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}'.format( tt.tm_year, tt.tm_mon, tt.tm_mday, tt.tm_hour, tt.tm_min, tt.tm_sec, ) ] if categories: print_args.append(','.join(categories).upper() + ':') print_args.append(headline) if logtags: print_args.append('[' + ' '.join(map(str, logtags)) + ']') with pfx_call(open, logpath, 'a') as logf: print(*print_args, file=logf) # add the headline and categories to the tags logtags.add('headline', headline) if categories: logtags.add('categories', sorted(categories)) sqltags.default_factory(None, unixtime=when, tags=logtags)
def plex_linkpath( fstags, filepath, plex_topdirpath, do_hardlink=False, print=None ): ''' Link `filepath` into `plex_topdirpath`. Parameters: * `fstags`: the `FSTags` instance * `filepath`: filesystem pathname of file to link into Plex tree * `plex_topdirpath`: filesystem pathname of the Plex tree * `do_hardlink`: use a hard link if true, otherwise a softlink; default `False` * `print`: print function for the link action, default from `builtins.print` ''' if print is None: print = builtins.print tagged_path = fstags[filepath] subpath = plex_subpath(tagged_path) plexpath = joinpath(plex_topdirpath, subpath) plexdirpath = dirname(plexpath) if do_hardlink: if existspath(plexpath): if samefile(filepath, plexpath): return pfx_call(os.unlink, plexpath) print(subpath, "<=", basename(filepath)) if not isdirpath(plexdirpath): pfx_call(os.makedirs, plexdirpath) pfx_call(os.link, filepath, plexpath) else: rfilepath = relpath(filepath, plexdirpath) if existspath(plexpath): try: sympath = os.readlink(plexpath) except OSError as e: warning("readlink(%r): %s", plexpath, e) else: if rfilepath == sympath: return pfx_call(os.unlink, plexpath) print(subpath, "<=", basename(filepath)) if not isdirpath(plexdirpath): pfx_call(os.makedirs, plexdirpath) pfx_call(os.symlink, rfilepath, plexpath)
def pngfor(path, max_size=None, *, min_size=None, cached=None, force=False): ''' Create a PNG version of the image at `path`, scaled to fit within some size constraints. return the pathname of the PNG file. Parameters: * `max_size`: optional `(width,height)` tuple, default `(1920,1800)` * `min_size`: optional `(width,height)` tuple, default half of `max_size` * `cached`: optional mapping of `(path,'png',size)`->`pngof_path` where size is the chosen final size tuple * `force`: optional flag (default `False`) to force recreation of the PNG version and associated cache entry ''' if max_size is None: max_size = 1920, 1080 if min_size is None: min_size = max_size[0] // 2, max_size[1] // 2 if cached is None: cached = _conv_cache tagged = _fstags[path] path = tagged.filepath size = image_size(path) if size is None: return None # choose a target size if size[0] > max_size[0] or size[1] > max_size[1]: scale = min(max_size[0] / size[0], max_size[1] / size[1]) re_size = int(size[0] * scale), int(size[1] * scale) ##warning("too big, rescale by %s from %r to %r", scale, size, re_size) key = path, 'png', re_size elif size[0] < min_size[0] or size[1] < min_size[1]: scale = min(min_size[0] / size[0], min_size[1] / size[1]) re_size = int(size[0] * scale), int(size[1] * scale) ##warning("too small, rescale by %s from %r to %r", scale, size, re_size) key = path, 'png', re_size else: re_size = None key = path, 'png', size cached_path = cached.get(key) if cached_path: return cached_path if tagged['pil.format'] == 'PNG' and re_size is None: # right format, same size - return ourself cached[key] = tagged.filepath return tagged.filepath # path to converted file hashcode = SHA256.from_pathname(path) pngbase = f'{hashcode}.png' if not isdirpath(CONVCACHE_ROOT): pfx_call(os.mkdir, CONVCACHE_ROOT) convsize = re_size or size convdirpath = joinpath(CONVCACHE_ROOT, f'png/{convsize[0]}x{convsize[1]}') if not isdirpath(convdirpath): pfx_call(os.makedirs, convdirpath) pngpath = joinpath(convdirpath, pngbase) if force or not isfilepath(pngpath): try: with Image.open(path) as im: if re_size is None: pfx_call(im.save, pngpath, 'PNG') else: im2 = im.resize(re_size) pfx_call(im2.save, pngpath, 'PNG') except UnidentifiedImageError as e: warning("unhandled image: %s", e) pngpath = None cached[key] = pngpath return pngpath
def download(self, download_id: int, filename=None): ''' Download the file with `download_id` to `filename_basis`. Return the `TagSet` for the recording. The default `filename` is the basename of the filename from the download. If the filename is supplied with a trailing dot (`'.'`) then the file extension will be taken from the filename of the download URL. ''' dl_data = self.suburl_data(f'library/{download_id}/download') dl_url = dl_data['url'] dl_basename = unpercent(basename(dl_url)) if filename is None: filename = dl_basename elif filename.endswith('.'): _, dl_ext = splitext(dl_basename) filename = filename[:-1] + dl_ext if pathexists(filename): warning("SKIPPING download of %r: already exists, just tagging", filename) dl_rsp = None else: dl_cookies = dl_data['data'] jar = requests.cookies.RequestsCookieJar() for ck_name in 'CloudFront-Expires', 'CloudFront-Key-Pair-Id', 'CloudFront-Signature': jar.set( ck_name, str(dl_cookies[ck_name]), domain='playonrecorder.com', secure=True, ) dl_rsp = requests.get(dl_url, auth=_RequestsNoAuth(), cookies=jar, stream=True) dl_length = int(dl_rsp.headers['Content-Length']) with pfx_call(atomic_filename, filename, mode='wb', placeholder=True) as f: for chunk in progressbar( dl_rsp.iter_content(chunk_size=131072), label=filename, total=dl_length, units_scale=BINARY_BYTES_SCALE, itemlenfunc=len, report_print=True, ): offset = 0 length = len(chunk) while offset < length: with Pfx("write %d bytes", length - offset): written = f.write(chunk[offset:]) if written < 1: warning("fewer than 1 bytes written: %s", written) else: offset += written assert offset <= length assert offset == length fullpath = realpath(filename) recording = self[download_id] if dl_rsp is not None: recording.set('download_path', fullpath) # apply the SQLTagSet to the FSTags TagSet self._fstags[fullpath].update(recording.subtags('playon'), prefix='playon') return recording