def update(corpus, docix, path): """Reindex a document by index number. """ docix = int(docix) with click_spinner.spinner(): c = Corpus(corpus) c.update(docix=docix, path=Path(path)) if c.work_by_docix(docix).searchable: click.echo(f"[+] updated document {docix} in '{corpus}'") else: click.echo(f"[-] failed")
def verify(corpus, verbose, dry_run): """Verify the integrity of corpus indexes and manifest. """ c = Corpus(corpus) manifest = c.manifest if c.index_dir.exists() and click.confirm( f"{len(manifest)} documents manifested for corpus '{c.name}'. " + (f"This might take a while! " if len(manifest) > 30 else "") + f"Proceed?", default=True, ): verified = [] passes = fixes = adds = orphans = 0 missing = {} with click.progressbar( manifest, length=len(manifest), show_percent=True, label=f"Verifying '{c.name}'", ) as bar: for item in bar: ( status, (docix, author, title, filename, info), ) = c.verify_by_docix(item, dry_run=dry_run) msg = f"[{docix}] {author}, {title} ({filename})" if status == 0: msg += ", passed!" passes += 1 elif status == 1: msg += ", fixed in manifest" fixes += 1 elif status == 2: msg += ", added to manifest" adds += 1 elif status == 3: msg += ", deleted orphaned index files" orphans += 1 elif status == 4: msg = ( f"[{docix}] {manifest[item]['author']}, {manifest[item]['title']} (" f"{manifest[item]['filename']})" ) msg += ", missing index files!" missing[item] = manifest[item] if info is not None and cylleneus.settings.DEBUG_LEVEL: msg += f" (= {info})" verified.append((docix, msg)) if verbose and len(verified) != 0: click.echo_via_pager( "\n".join( [ ("*" if dry_run else "") + item[1] for item in sorted(verified, key=lambda item: item[0]) ] ) ) click.echo( f"[-] '{corpus}': {len(manifest)} checked, {passes} passed" + (f", {fixes} fixed in manifest" if fixes else "") + (f", {adds} added to manifest, " if adds else "") + (f", {orphans} orphaned files deleted" if orphans else "") + ( f" -- changes have NOT been committed!" if dry_run and passes < len(manifest) else "" ) ) if len(missing) != 0 and click.confirm( f"Try to re-index {len(missing)} missing documents?", default=True, ): for docix, meta in missing.items(): if meta["filename"]: path = c.text_dir / Path(meta["filename"]) with click_spinner.spinner(): updated_docix = ( c.update(docix, path) if not dry_run else None ) if updated_docix is not None: click.echo( f"[{updated_docix}] {meta['author']}, {meta['title']} ({meta['filename']}), " f"index created!" ) else: if dry_run: click.echo( f"*[-] {meta['author']}, {meta['title']} " f"({meta['filename']}) -- document NOT re-indexed!" ) else: click.echo( f"[-] {meta['author']}, {meta['title']} ({meta['filename']}), failed" )