def set_checksums(apps, schema_editor):

    document_model = apps.get_model("documents", "Document")

    if not document_model.objects.all().exists():
        return

    print(
        colourise(
            "\n\n"
            "  This is a one-time only migration to generate checksums for all\n"
            "  of your existing documents.  If you have a lot of documents\n"
            "  though, this may take a while, so a coffee break may be in\n"
            "  order."
            "\n",
            opts=("bold", )))

    sums = {}
    for d in document_model.objects.all():

        document = Document(d)

        print("    {} {} {}".format(
            colourise("*", fg="green"),
            colourise("Generating a checksum for", fg="white"),
            colourise(document.file_name, fg="cyan")))

        with document.source_file as encrypted:
            checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()

        if checksum in sums:
            error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
                p1=colourise(
                    "It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97).  The documents in question\nare:",
                    fg="yellow"),
                p2=colourise(
                    "To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:",
                    fg="yellow"),
                p3=colourise(
                    "When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.",
                    fg="yellow"),
                doc1=colourise("  * {} (id: {})".format(
                    sums[checksum][1], sums[checksum][0]),
                               fg="red"),
                doc2=colourise("  * {} (id: {})".format(
                    document.file_name, document.pk),
                               fg="red"),
                code=colourise(
                    "  $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell"
                    .format(pk=document.pk),
                    fg="green"),
                line=colourise("\n{}\n".format("=" * 80),
                               fg="white",
                               opts=("bold", )))
            raise RuntimeError(error)
        sums[checksum] = (document.pk, document.file_name)

        document_model.objects.filter(pk=document.pk).update(checksum=checksum)
def move_documents_and_create_thumbnails(apps, schema_editor):

    documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))

    if set(documents) == {"originals", "thumbnails"}:
        return

    print(colourise(
        "\n\n"
        "  This is a one-time only migration to generate thumbnails for all of your\n"
        "  documents so that future UIs will have something to work with.  If you have\n"
        "  a lot of documents though, this may take a while, so a coffee break may be\n"
        "  in order."
        "\n", opts=("bold",)
    ))

    for f in sorted(documents):

        if not f.endswith("gpg"):
            continue

        print("    {} {} {}".format(
            colourise("*", fg="green"),
            colourise("Generating a thumbnail for", fg="white"),
            colourise(f, fg="cyan")
        ))

        thumb_temp = tempfile.mkdtemp(
            prefix="paperless", dir=settings.SCRATCH_DIR)
        orig_temp = tempfile.mkdtemp(
            prefix="paperless", dir=settings.SCRATCH_DIR)

        orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
        orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))

        with open(orig_source, "rb") as encrypted:
            with open(orig_target, "wb") as unencrypted:
                unencrypted.write(GnuPG.decrypted(encrypted))

        subprocess.Popen((
            settings.CONVERT_BINARY,
            "-scale", "500x5000",
            "-alpha", "remove",
            orig_target,
            os.path.join(thumb_temp, "convert-%04d.png")
        )).wait()

        thumb_source = os.path.join(thumb_temp, "convert-0000.png")
        thumb_target = os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "thumbnails",
            re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
        )
        with open(thumb_source, "rb") as unencrypted:
            with open(thumb_target, "wb") as encrypted:
                encrypted.write(GnuPG.encrypted(unencrypted))

        shutil.rmtree(thumb_temp)
        shutil.rmtree(orig_temp)

        shutil.move(
            os.path.join(settings.MEDIA_ROOT, "documents", f),
            os.path.join(settings.MEDIA_ROOT, "documents", "originals", f),
        )
def set_checksums(apps, schema_editor):

    document_model = apps.get_model("documents", "Document")

    if not document_model.objects.all().exists():
        return

    print(colourise(
        "\n\n"
        "  This is a one-time only migration to generate checksums for all\n"
        "  of your existing documents.  If you have a lot of documents\n"
        "  though, this may take a while, so a coffee break may be in\n"
        "  order."
        "\n", opts=("bold",)
    ))

    sums = {}
    for d in document_model.objects.all():

        document = Document(d)

        print("    {} {} {}".format(
            colourise("*", fg="green"),
            colourise("Generating a checksum for", fg="white"),
            colourise(document.file_name, fg="cyan")
        ))

        with document.source_file as encrypted:
            checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()

        if checksum in sums:
            error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
                p1=colourise("It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97).  The documents in question\nare:", fg="yellow"),
                p2=colourise("To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow"),
                p3=colourise("When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow"),
                doc1=colourise("  * {} (id: {})".format(sums[checksum][1], sums[checksum][0]), fg="red"),
                doc2=colourise("  * {} (id: {})".format(document.file_name, document.pk), fg="red"),
                code=colourise("  $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(pk=document.pk), fg="green"),
                line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",))
            )
            raise RuntimeError(error)
        sums[checksum] = (document.pk, document.file_name)

        document_model.objects.filter(pk=document.pk).update(checksum=checksum)