def render_to_response(self, context, **response_kwargs): """ Override the default to return the unencrypted image/PDF as raw data. """ content_types = { Document.TYPE_PDF: "application/pdf", Document.TYPE_PNG: "image/png", Document.TYPE_JPG: "image/jpeg", Document.TYPE_GIF: "image/gif", Document.TYPE_TIF: "image/tiff", } if self.kwargs["kind"] == "thumb": return HttpResponse( GnuPG.decrypted(self.object.thumbnail_file), content_type=content_types[Document.TYPE_PNG] ) response = HttpResponse( GnuPG.decrypted(self.object.source_file), content_type=content_types[self.object.file_type] ) response["Content-Disposition"] = 'attachment; filename="{}"'.format( self.object.file_name) return response
def _import_files_from_manifest(self): for record in self.manifest: if not record["model"] == "documents.document": continue doc_file = record[EXPORTER_FILE_NAME] thumb_file = record[EXPORTER_THUMBNAIL_NAME] document = Document.objects.get(pk=record["pk"]) document_path = os.path.join(self.source, doc_file) thumbnail_path = os.path.join(self.source, thumb_file) if document.storage_type == Document.STORAGE_TYPE_GPG: with open(document_path, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: print("Encrypting {} and saving it to {}".format( doc_file, document.source_path)) encrypted.write(GnuPG.encrypted(unencrypted)) with open(thumbnail_path, "rb") as unencrypted: with open(document.thumbnail_path, "wb") as encrypted: print("Encrypting {} and saving it to {}".format( thumb_file, document.thumbnail_path)) encrypted.write(GnuPG.encrypted(unencrypted)) else: shutil.copy(document_path, document.source_path) shutil.copy(thumbnail_path, document.thumbnail_path)
def __gpg_to_unencrypted(passphrase): encrypted_files = Document.objects.filter( storage_type=Document.STORAGE_TYPE_GPG) for document in encrypted_files: print(coloured("Decrypting {}".format(document), "green")) old_paths = [document.source_path, document.thumbnail_path] raw_document = GnuPG.decrypted(document.source_file, passphrase) raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase) document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED with open(document.source_path, "wb") as f: f.write(raw_document) with open(document.thumbnail_path, "wb") as f: f.write(raw_thumb) document.save(update_fields=("storage_type", )) for path in old_paths: os.unlink(path)
def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) for index, document_dict in enumerate(manifest): # Force output to unencrypted as that will be the current state. # The importer will make the decision to encrypt or not. manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 document = document_map[document_dict["pk"]] unique_filename = f"{document.pk:07}_{document.file_name}" file_target = os.path.join(self.target, unique_filename) thumbnail_name = unique_filename + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) document_dict[EXPORTER_FILE_NAME] = unique_filename document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name print(f"Exporting: {file_target}") t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: with open(file_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) os.utime(file_target, times=(t, t)) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) os.utime(thumbnail_target, times=(t, t)) else: shutil.copy(document.source_path, file_target) shutil.copy(document.thumbnail_path, thumbnail_target) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads(serializers.serialize( "json", Tag.objects.all())) manifest += json.loads(serializers.serialize( "json", DocumentType.objects.all())) with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2)
def _store(self, text, pdf): sender, title = self._parse_file_name(pdf) relevant_tags = [t for t in Tag.objects.all() if t.matches(text.lower())] stats = os.stat(pdf) self._render(" Saving record to database", 2) doc = Document.objects.create( sender=sender, title=title, content=text, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self._render(" Tagging with {}".format(tag_names), 2) doc.tags.add(*relevant_tags) with open(pdf, "rb") as unencrypted: with open(doc.pdf_path, "wb") as encrypted: self._render(" Encrypting", 3) encrypted.write(GnuPG.encrypted(unencrypted))
def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) for document_dict in manifest: document = document_map[document_dict["pk"]] target = os.path.join(self.target, document.file_name) document_dict["__exported_file_name__"] = target print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) t = int(time.mktime(document.created.timetuple())) os.utime(target, times=(t, t)) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads(serializers.serialize("json", Tag.objects.all())) with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2)
def _store(self, text, doc): sender, title, tags, file_type = self._guess_attributes_from_name(doc) relevant_tags = set(list(Tag.match_all(text)) + list(tags)) stats = os.stat(doc) self.log("debug", "Saving record to database") document = Document.objects.create( sender=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: self.log("debug", "Encrypting") encrypted.write(GnuPG.encrypted(unencrypted)) self.log("info", "Completed")
def _store(self, text, doc): sender, title, tags, file_type = self._guess_attributes_from_name(doc) tags = list(tags) lower_text = text.lower() relevant_tags = set( [t for t in Tag.objects.all() if t.matches(lower_text)] + tags) stats = os.stat(doc) Log.debug("Saving record to database", Log.COMPONENT_CONSUMER) document = Document.objects.create( sender=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) Log.debug( "Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: Log.debug("Encrypting", Log.COMPONENT_CONSUMER) encrypted.write(GnuPG.encrypted(unencrypted))
def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) for document_dict in manifest: document = document_map[document_dict["pk"]] target = os.path.join(self.target, document.file_name) document_dict["__exported_file_name__"] = target print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) t = int(time.mktime(document.created.timetuple())) os.utime(target, times=(t, t)) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads(serializers.serialize( "json", Tag.objects.all())) with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2)
def _store(self, text, pdf): sender, title = self._parse_file_name(pdf) relevant_tags = [ t for t in Tag.objects.all() if t.matches(text.lower()) ] stats = os.stat(pdf) self._render(" Saving record to database", 2) doc = Document.objects.create( sender=sender, title=title, content=text, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime))) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self._render(" Tagging with {}".format(tag_names), 2) doc.tags.add(*relevant_tags) with open(pdf, "rb") as unencrypted: with open(doc.pdf_path, "wb") as encrypted: self._render(" Encrypting", 3) encrypted.write(GnuPG.encrypted(unencrypted))
def changed_password_check(app_configs, **kwargs): from documents.models import Document from paperless.db import GnuPG try: encrypted_doc = Document.objects.filter( storage_type=Document.STORAGE_TYPE_GPG).first() except (OperationalError, ProgrammingError): return [] # No documents table yet if encrypted_doc: if not settings.PASSPHRASE: return [ Error( "The database contains encrypted documents but no password " "is set.") ] if not GnuPG.decrypted(encrypted_doc.source_file): return [ Error( textwrap.dedent(""" The current password doesn't match the password of the existing documents. If you intend to change your password, you must first export all of the old documents, start fresh with the new password and then re-import them." """)) ] return []
def _store(self, text, doc): sender, title, tags, file_type = self._guess_attributes_from_name(doc) relevant_tags = set(list(Tag.match_all(text)) + list(tags)) stats = os.stat(doc) Log.debug("Saving record to database", Log.COMPONENT_CONSUMER) document = Document.objects.create( sender=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime))) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) Log.debug("Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: Log.debug("Encrypting", Log.COMPONENT_CONSUMER) encrypted.write(GnuPG.encrypted(unencrypted))
def _write(self, document, source, target): with open(source, "rb") as read_file: with open(target, "wb") as write_file: if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: write_file.write(read_file.read()) return self.log("debug", "Encrypting") write_file.write(GnuPG.encrypted(read_file))
def _store(self, text, doc, thumbnail): file_info = FileInfo.from_path(doc) stats = os.stat(doc) self.log("debug", "Saving record to database") created = file_info.created or timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) dated = self._extract_date(text) with open(doc, "rb") as f: document = Document.objects.create( correspondent=file_info.correspondent, title=file_info.title, content=text, file_type=file_info.extension, checksum=hashlib.md5(f.read()).hexdigest(), created=created, modified=created, dated=dated) relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) # Encrypt and store the actual document with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: self.log("debug", "Encrypting the document") encrypted.write(GnuPG.encrypted(unencrypted)) # Encrypt and store the thumbnail with open(thumbnail, "rb") as unencrypted: with open(document.thumbnail_path, "wb") as encrypted: self.log("debug", "Encrypting the thumbnail") encrypted.write(GnuPG.encrypted(unencrypted)) self.log("info", "Completed") return document
def _import_files_from_manifest(self): for record in self.manifest: if not record["model"] == "documents.document": continue doc_file = record[EXPORTER_FILE_NAME] thumb_file = record[EXPORTER_THUMBNAIL_NAME] document = Document.objects.get(pk=record["pk"]) document_path = os.path.join(self.source, doc_file) thumbnail_path = os.path.join(self.source, thumb_file) if settings.PASSPHRASE: with open(document_path, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: print("Encrypting {} and saving it to {}".format( doc_file, document.source_path)) encrypted.write(GnuPG.encrypted(unencrypted)) with open(thumbnail_path, "rb") as unencrypted: with open(document.thumbnail_path, "wb") as encrypted: print("Encrypting {} and saving it to {}".format( thumb_file, document.thumbnail_path)) encrypted.write(GnuPG.encrypted(unencrypted)) else: shutil.copy2(document_path, document.source_path) shutil.copy2(thumbnail_path, document.thumbnail_path) # Reset the storage type to whatever we've used while importing storage_type = Document.STORAGE_TYPE_UNENCRYPTED if settings.PASSPHRASE: storage_type = Document.STORAGE_TYPE_GPG Document.objects.filter( pk__in=[r["pk"] for r in self.manifest] ).update( storage_type=storage_type )
def thumb(self, request, pk=None): try: doc = Document.objects.get(id=pk) if doc.storage_type == Document.STORAGE_TYPE_GPG: handle = GnuPG.decrypted(doc.thumbnail_file) else: handle = doc.thumbnail_file return HttpResponse(handle, content_type='image/png') except (FileNotFoundError, Document.DoesNotExist): raise Http404()
def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) for document_dict in manifest: document = document_map[document_dict["pk"]] file_target = os.path.join(self.target, document.file_name) thumbnail_name = document.file_name + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) document_dict[EXPORTER_FILE_NAME] = document.file_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name print("Exporting: {}".format(file_target)) t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: with open(file_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) os.utime(file_target, times=(t, t)) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) os.utime(thumbnail_target, times=(t, t)) else: shutil.copy(document.source_path, file_target) shutil.copy(document.thumbnail_path, thumbnail_target) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads(serializers.serialize("json", Tag.objects.all())) with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2)
def _store(self, text, doc, thumbnail): file_info = FileInfo.from_path(doc) stats = os.stat(doc) self.log("debug", "Saving record to database") with open(doc, "rb") as f: document = Document.objects.create( correspondent=file_info.correspondent, title=file_info.title, content=text, file_type=file_info.extension, checksum=hashlib.md5(f.read()).hexdigest(), created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) # Encrypt and store the actual document with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: self.log("debug", "Encrypting the document") encrypted.write(GnuPG.encrypted(unencrypted)) # Encrypt and store the thumbnail with open(thumbnail, "rb") as unencrypted: with open(document.thumbnail_path, "wb") as encrypted: self.log("debug", "Encrypting the thumbnail") encrypted.write(GnuPG.encrypted(unencrypted)) self.log("info", "Completed") return document
def render_to_response(self, context, **response_kwargs): """ Override the default to return the unencrypted PDF as raw data. """ response = HttpResponse( GnuPG.decrypted(self.object.pdf), content_type="application/pdf") response["Content-Disposition"] = 'attachment; filename="{}"'.format( slugify(str(self.object)) + ".pdf") return response
def render_to_response(self, context, **response_kwargs): """ Override the default to return the unencrypted PDF as raw data. """ response = HttpResponse(GnuPG.decrypted(self.object.pdf), content_type="application/pdf") response["Content-Disposition"] = 'attachment; filename="{}"'.format( slugify(str(self.object)) + ".pdf") return response
def file_response(self, pk, disposition): doc = Document.objects.get(id=pk) if doc.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: file_handle = doc.source_file else: file_handle = GnuPG.decrypted(doc.source_file) response = HttpResponse(file_handle, content_type=doc.mime_type) response["Content-Disposition"] = '{}; filename="{}"'.format( disposition, doc.file_name) return response
def dump_legacy(self): for document in Document.objects.all(): target = os.path.join( self.target, self._get_legacy_file_name(document)) print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) t = int(time.mktime(document.created.timetuple())) os.utime(target, times=(t, t))
def dump_legacy(self): for document in Document.objects.all(): target = os.path.join(self.target, self._get_legacy_file_name(document)) print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) t = int(time.mktime(document.created.timetuple())) os.utime(target, times=(t, t))
def __unencrypted_to_gpg(passphrase): unencrypted_files = Document.objects.filter( storage_type=Document.STORAGE_TYPE_UNENCRYPTED) for document in unencrypted_files: print(coloured("Encrypting {}".format(document), "green")) old_paths = [document.source_path, document.thumbnail_path] with open(document.source_path, "rb") as raw_document: with open(document.thumbnail_path, "rb") as raw_thumb: document.storage_type = Document.STORAGE_TYPE_GPG with open(document.source_path, "wb") as f: f.write(GnuPG.encrypted(raw_document, passphrase)) with open(document.thumbnail_path, "wb") as f: f.write(GnuPG.encrypted(raw_thumb, passphrase)) document.save(update_fields=("storage_type", )) for path in old_paths: os.unlink(path)
def _import_files_from_manifest(self): for record in self.manifest: if not record["model"] == "documents.document": continue doc_file = record["__exported_file_name__"] document = Document.objects.get(pk=record["pk"]) with open(doc_file, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: print("Encrypting {} and saving it to {}".format( doc_file, document.source_path)) encrypted.write(GnuPG.encrypted(unencrypted))
def _store(self, text, doc, thumbnail): sender, title, tags, file_type = self._guess_attributes_from_name(doc) relevant_tags = set(list(Tag.match_all(text)) + list(tags)) stats = os.stat(doc) self.log("debug", "Saving record to database") document = Document.objects.create( correspondent=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime))) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) # Encrypt and store the actual document with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: self.log("debug", "Encrypting the document") encrypted.write(GnuPG.encrypted(unencrypted)) # Encrypt and store the thumbnail with open(thumbnail, "rb") as unencrypted: with open(document.thumbnail_path, "wb") as encrypted: self.log("debug", "Encrypting the thumbnail") encrypted.write(GnuPG.encrypted(unencrypted)) self.log("info", "Completed")
def __gpg_to_unencrypted(passphrase): encrypted_files = Document.objects.filter( storage_type=Document.STORAGE_TYPE_GPG) for document in encrypted_files: print(coloured("Decrypting {}".format( document).encode('utf-8'), "green")) old_paths = [document.source_path, document.thumbnail_path] raw_document = GnuPG.decrypted(document.source_file, passphrase) raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase) document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED ext = os.path.splitext(document.filename)[1] if not ext == '.gpg': raise CommandError( f"Abort: encrypted file {document.source_path} does not " f"end with .gpg") document.filename = os.path.splitext(document.filename)[0] with open(document.source_path, "wb") as f: f.write(raw_document) with open(document.thumbnail_path, "wb") as f: f.write(raw_thumb) Document.objects.filter(id=document.id).update( storage_type=document.storage_type, filename=document.filename) for path in old_paths: os.unlink(path)
def add_mime_types(apps, schema_editor): Document = apps.get_model("documents", "Document") documents = Document.objects.all() for d in documents: f = open(source_path(d), "rb") if d.storage_type == STORAGE_TYPE_GPG: data = GnuPG.decrypted(f) else: data = f.read(1024) d.mime_type = magic.from_buffer(data, mime=True) d.save() f.close()
def file_response(self, pk, request, disposition): doc = Document.objects.get(id=pk) if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501 file_handle = doc.archive_file filename = doc.get_public_filename(archive=True) mime_type = 'application/pdf' else: file_handle = doc.source_file filename = doc.get_public_filename() mime_type = doc.mime_type if doc.storage_type == Document.STORAGE_TYPE_GPG: file_handle = GnuPG.decrypted(file_handle) response = HttpResponse(file_handle, content_type=mime_type) response["Content-Disposition"] = '{}; filename="{}"'.format( disposition, filename) return response
def _store(self, text, pdf): sender, title = self._parse_file_name(pdf) stats = os.stat(pdf) self._render(" Saving record to database", 2) doc = Document.objects.create( sender=sender, title=title, content=text, created=timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime)), ) with open(pdf, "rb") as unencrypted: with open(doc.pdf_path, "wb") as encrypted: self._render(" Encrypting", 3) encrypted.write(GnuPG.encrypted(unencrypted))
def _store(self, text, pdf): sender, title = self._parse_file_name(pdf) stats = os.stat(pdf) self._render(" Saving record to database", 2) doc = Document.objects.create( sender=sender, title=title, content=text, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime))) with open(pdf, "rb") as unencrypted: with open(doc.pdf_path, "wb") as encrypted: self._render(" Encrypting", 3) encrypted.write(GnuPG.encrypted(unencrypted))
def handle(self, *args, **options): self.verbosity = options["verbosity"] self.target = options["target"] if not os.path.exists(self.target): raise CommandError("That path doesn't exist") if not os.access(self.target, os.W_OK): raise CommandError("That path doesn't appear to be writable") if not settings.PASSPHRASE: settings.PASSPHRASE = input("Please enter the passphrase: ") for document in Document.objects.all(): target = os.path.join(self.target, document.parseable_file_name) self._render("Exporting: {}".format(target), 1) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.pdf))
def handle(self, *args, **options): self.verbosity = options["verbosity"] self.target = options["target"] if not os.path.exists(self.target): raise CommandError("That path doesn't exist") if not os.access(self.target, os.W_OK): raise CommandError("That path doesn't appear to be writable") if not settings.PASSPHRASE: settings.PASSPHRASE = input("Please enter the passphrase: ") for document in Document.objects.all(): target = os.path.join(self.target, document.file_name) print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) t = int(time.mktime(document.created.timetuple())) os.utime(target, times=(t, t))
def dump(self): # 1. Take a snapshot of what files exist in the current export folder for root, dirs, files in os.walk(self.target): self.files_in_export_dir.extend( map(lambda f: os.path.abspath(os.path.join(root, f)), files)) # 2. Create manifest, containing all correspondents, types, tags and # documents with transaction.atomic(): manifest = json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads( serializers.serialize("json", Tag.objects.all())) manifest += json.loads( serializers.serialize("json", DocumentType.objects.all())) documents = Document.objects.order_by("id") document_map = {d.pk: d for d in documents} document_manifest = json.loads( serializers.serialize("json", documents)) manifest += document_manifest # 3. Export files from each document for index, document_dict in tqdm.tqdm(enumerate(document_manifest), total=len(document_manifest)): # 3.1. store files unencrypted document_dict["fields"][ "storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 document = document_map[document_dict["pk"]] # 3.2. generate a unique filename filename_counter = 0 while True: if self.use_filename_format: base_name = generate_filename(document, counter=filename_counter, append_gpg=False) else: base_name = document.get_public_filename( counter=filename_counter) if base_name not in self.exported_files: self.exported_files.append(base_name) break else: filename_counter += 1 # 3.3. write filenames into manifest original_name = base_name original_target = os.path.join(self.target, original_name) document_dict[EXPORTER_FILE_NAME] = original_name thumbnail_name = base_name + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name if os.path.exists(document.archive_path): archive_name = base_name + "-archive.pdf" archive_target = os.path.join(self.target, archive_name) document_dict[EXPORTER_ARCHIVE_NAME] = archive_name else: archive_target = None # 3.4. write files to target folder t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: os.makedirs(os.path.dirname(original_target), exist_ok=True) with open(original_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) os.utime(original_target, times=(t, t)) os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) os.utime(thumbnail_target, times=(t, t)) if archive_target: os.makedirs(os.path.dirname(archive_target), exist_ok=True) with open(archive_target, "wb") as f: f.write(GnuPG.decrypted(document.archive_path)) os.utime(archive_target, times=(t, t)) else: self.check_and_copy(document.source_path, document.checksum, original_target) self.check_and_copy(document.thumbnail_path, None, thumbnail_target) if archive_target: self.check_and_copy(document.archive_path, document.archive_checksum, archive_target) # 4. write manifest to target forlder manifest_path = os.path.abspath( os.path.join(self.target, "manifest.json")) with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) if self.delete: # 5. Remove files which we did not explicitly export in this run if manifest_path in self.files_in_export_dir: self.files_in_export_dir.remove(manifest_path) for f in self.files_in_export_dir: os.remove(f) delete_empty_directories(os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target))
def _get_raw_data(self, file_handle): if self.object.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: return file_handle return GnuPG.decrypted(file_handle)
def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) for index, document_dict in enumerate(manifest): # Force output to unencrypted as that will be the current state. # The importer will make the decision to encrypt or not. manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 document = document_map[document_dict["pk"]] print(f"Exporting: {document}") filename_counter = 0 while True: original_name = document.get_public_filename( counter=filename_counter) original_target = os.path.join(self.target, original_name) if not os.path.exists(original_target): break else: filename_counter += 1 thumbnail_name = original_name + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) document_dict[EXPORTER_FILE_NAME] = original_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name if os.path.exists(document.archive_path): archive_name = document.get_public_filename( archive=True, counter=filename_counter, suffix="_archive") archive_target = os.path.join(self.target, archive_name) document_dict[EXPORTER_ARCHIVE_NAME] = archive_name else: archive_target = None t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: with open(original_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) os.utime(original_target, times=(t, t)) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) os.utime(thumbnail_target, times=(t, t)) if archive_target: with open(archive_target, "wb") as f: f.write(GnuPG.decrypted(document.archive_path)) os.utime(archive_target, times=(t, t)) else: shutil.copy(document.source_path, original_target) shutil.copy(document.thumbnail_path, thumbnail_target) if archive_target: shutil.copy(document.archive_path, archive_target) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) manifest += json.loads(serializers.serialize( "json", Tag.objects.all())) manifest += json.loads(serializers.serialize( "json", DocumentType.objects.all())) with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2)