def set_checksums(apps, schema_editor): document_model = apps.get_model("documents", "Document") if not document_model.objects.all().exists(): return print( colourise( "\n\n" " This is a one-time only migration to generate checksums for all\n" " of your existing documents. If you have a lot of documents\n" " though, this may take a while, so a coffee break may be in\n" " order." "\n", opts=("bold", ))) sums = {} for d in document_model.objects.all(): document = Document(d) print(" {} {} {}".format( colourise("*", fg="green"), colourise("Generating a checksum for", fg="white"), colourise(document.file_name, fg="cyan"))) with document.source_file as encrypted: checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest() if checksum in sums: error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format( p1=colourise( "It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:", fg="yellow"), p2=colourise( "To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow"), p3=colourise( "When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow"), doc1=colourise(" * {} (id: {})".format( sums[checksum][1], sums[checksum][0]), fg="red"), doc2=colourise(" * {} (id: {})".format( document.file_name, document.pk), fg="red"), code=colourise( " $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell" .format(pk=document.pk), fg="green"), line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold", ))) raise RuntimeError(error) sums[checksum] = (document.pk, document.file_name) document_model.objects.filter(pk=document.pk).update(checksum=checksum)
def move_documents_and_create_thumbnails(apps, schema_editor): documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents")) if set(documents) == {"originals", "thumbnails"}: return print(colourise( "\n\n" " This is a one-time only migration to generate thumbnails for all of your\n" " documents so that future UIs will have something to work with. If you have\n" " a lot of documents though, this may take a while, so a coffee break may be\n" " in order." "\n", opts=("bold",) )) for f in sorted(documents): if not f.endswith("gpg"): continue print(" {} {} {}".format( colourise("*", fg="green"), colourise("Generating a thumbnail for", fg="white"), colourise(f, fg="cyan") )) thumb_temp = tempfile.mkdtemp( prefix="paperless", dir=settings.SCRATCH_DIR) orig_temp = tempfile.mkdtemp( prefix="paperless", dir=settings.SCRATCH_DIR) orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f) orig_target = os.path.join(orig_temp, f.replace(".gpg", "")) with open(orig_source, "rb") as encrypted: with open(orig_target, "wb") as unencrypted: unencrypted.write(GnuPG.decrypted(encrypted)) subprocess.Popen(( settings.CONVERT_BINARY, "-scale", "500x5000", "-alpha", "remove", orig_target, os.path.join(thumb_temp, "convert-%04d.png") )).wait() thumb_source = os.path.join(thumb_temp, "convert-0000.png") thumb_target = os.path.join( settings.MEDIA_ROOT, "documents", "thumbnails", re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f) ) with open(thumb_source, "rb") as unencrypted: with open(thumb_target, "wb") as encrypted: encrypted.write(GnuPG.encrypted(unencrypted)) shutil.rmtree(thumb_temp) shutil.rmtree(orig_temp) shutil.move( os.path.join(settings.MEDIA_ROOT, "documents", f), os.path.join(settings.MEDIA_ROOT, "documents", "originals", f), )
def set_checksums(apps, schema_editor): document_model = apps.get_model("documents", "Document") if not document_model.objects.all().exists(): return print(colourise( "\n\n" " This is a one-time only migration to generate checksums for all\n" " of your existing documents. If you have a lot of documents\n" " though, this may take a while, so a coffee break may be in\n" " order." "\n", opts=("bold",) )) sums = {} for d in document_model.objects.all(): document = Document(d) print(" {} {} {}".format( colourise("*", fg="green"), colourise("Generating a checksum for", fg="white"), colourise(document.file_name, fg="cyan") )) with document.source_file as encrypted: checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest() if checksum in sums: error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format( p1=colourise("It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:", fg="yellow"), p2=colourise("To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow"), p3=colourise("When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow"), doc1=colourise(" * {} (id: {})".format(sums[checksum][1], sums[checksum][0]), fg="red"), doc2=colourise(" * {} (id: {})".format(document.file_name, document.pk), fg="red"), code=colourise(" $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(pk=document.pk), fg="green"), line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)) ) raise RuntimeError(error) sums[checksum] = (document.pk, document.file_name) document_model.objects.filter(pk=document.pk).update(checksum=checksum)