Beispiel #1
0
    def test_browse(self):
        """
        Check that user can view only documnent he/she is allowed to
        (has read access)
        """
        # Document owned by margaret
        Document.create_document(
            title="document_M",
            file_name="document_M.pdf",
            size='36',
            lang='DEU',
            user=self.margaret_user,
            page_count=3,
            parent_id=None
        )
        # This document is owned by testcase_user, not margaret.
        Document.create_document(
            title="document_X",
            file_name="document_X.pdf",
            size='36',
            lang='DEU',
            user=self.testcase_user,
            page_count=3,
            parent_id=None
        )
        self.client.login(
            testcase_user=self.margaret_user
        )
        ret = self.client.get(
            reverse('core:browse'),
            content_type='application/json',
            HTTP_X_REQUESTED_WITH='XMLHttpRequest',
        )
        self.assertEqual(
            ret.status_code,
            200
        )

        result = json.loads(ret.content)
        # margeret will see only one document.
        self.assertEqual(
            len(result['nodes']),
            1
        )
        self.assertEqual(
            result['nodes'][0]['title'],
            'document_M'
        )
Beispiel #2
0
    def test_normalize_doc_title(self):
        doc = Document.create_document(
            title="kyuss.pdf",
            user=self.user,
            lang="ENG",
            file_name="kyuss.pdf",
            size=1222,
            page_count=3
        )

        doc.save()
        # simulate a singnal trigger
        normalize_pages(doc)

        first_page = doc.pages.first()
        self.assertEqual(
            first_page.norm_doc_title,
            "kyuss.pdf"
        )

        result = Page.objects.search("kyuss")
        # doc has 3 pages, thus keyword kyuss will
        # match 3 pages (because of normalized page.norm_doc_title).
        self.assertEqual(
            result.count(),
            3
        )
Beispiel #3
0
 def get_whatever_doc(self):
     return Document.create_document(title="kyuss.pdf",
                                     user=self.user,
                                     lang="ENG",
                                     file_name="kyuss.pdf",
                                     size=1222,
                                     page_count=3)
Beispiel #4
0
    def test_delete_folder_with_document(self):

        folder_A = Folder.objects.create(title="A", user=self.user)

        folder_B = Folder.objects.create(
            title="B",
            user=self.user,
        )

        doc = Document.create_document(
            title="document.pdf",
            file_name="document.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            parent_id=folder_A.id,
            page_count=5,
        )
        doc.save()

        BaseTreeNode.objects.move_node(folder_B, folder_A)

        folder_A.refresh_from_db()
        # at this point, folder_A will have 2 descendants:
        #   * folder B
        #   * document.pdf
        descendants_count = folder_A.get_descendants(
            include_self=False).count()

        self.assertEqual(2, descendants_count)
        recursive_delete(folder_A)

        # by now everything should be deleted
        self.assertEqual(0, BaseTreeNode.objects.count())
 def test_preview(self):
     doc = Document.create_document(
         title="berlin.pdf",
         user=self.testcase_user,
         lang="ENG",
         file_name="berlin.pdf",
         size=1222,
         page_count=3
     )
     default_storage.copy_doc(
         src=os.path.join(
             BASE_DIR, "data", "berlin.pdf"
         ),
         dst=doc.path.url(),
     )
     ret = self.client.post(
         reverse('core:preview', args=(doc.id, 1, 1))
     )
     self.assertEqual(
         ret.status_code,
         200
     )
     page_path = PagePath(
         document_path=doc.path,
         page_num=1,
         step=Step(1),
         page_count=3
     )
     self.assertTrue(
         os.path.exists(
             default_storage.abspath(page_path.img_url())
         )
     )
    def test_deny_change_for_restricted_document(self):
        """
        Changing of the document should be restricted only to users who have
        PERM_WRITE permissions for respective document.
        """
        document_path = os.path.join(BASE_DIR, "data", "berlin.pdf")

        doc = Document.create_document(user=self.testcase_user,
                                       title='berlin.pdf',
                                       size=os.path.getsize(document_path),
                                       lang='deu',
                                       notes="Margaret, stay away!",
                                       file_name='berlin.pdf',
                                       page_count=3)
        self.assertEqual(Document.objects.count(), 1)

        document_url = reverse('core:document', args=(doc.id, ))

        document_data = {'notes': "It works!"}
        #
        # Margaret does not have access to document
        # berlin.pdf
        self.client.login(testcase_user=self.margaret_user)

        ret = self.client.patch(
            document_url,
            json.dumps(document_data),
            content_type='application/json',
            HTTP_X_REQUESTED_WITH='XMLHttpRequest',
        )
        self.assertEqual(ret.status_code, HttpResponseForbidden.status_code)
        # because margaret does not have access to the
        doc.refresh_from_db()
        self.assertEqual(doc.notes, "Margaret, stay away!")
    def test_allow_view_if_user_has_perm(self):
        """
        Changing of the document should be restricted only to users who have
        PERM_WRITE permissions for respective document.
        """
        document_path = os.path.join(BASE_DIR, "data", "berlin.pdf")

        doc = Document.create_document(user=self.testcase_user,
                                       title='berlin.pdf',
                                       size=os.path.getsize(document_path),
                                       lang='deu',
                                       file_name='berlin.pdf',
                                       page_count=3)

        document_url = reverse('core:document', args=(doc.id, ))

        create_access(
            node=doc,
            name=self.margaret_user.username,
            model_type=Access.MODEL_USER,
            access_type=Access.ALLOW,
            access_inherited=False,
            permissions={
                READ: True,
            }  # allow margaret to read/view the document
        )
        #
        # Margaret does not have access to document
        # berlin.pdf
        self.client.login(testcase_user=self.margaret_user)

        ret = self.client.get(document_url)
        self.assertEqual(ret.status_code, 200)
Beispiel #8
0
    def test_assign_tags_from_automate_instance(self):
        doc = Document.create_document(
            title="document_c",
            file_name="document_c.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            page_count=5,
        )
        doc.save()

        dst_folder = Folder.objects.create(title="destination Folder",
                                           user=self.user)

        auto = Automate.objects.create(
            name="whatever",
            match="XYZ",
            matching_algorithm=Automate.MATCH_ALL,
            is_case_sensitive=False,  # i.e. ignore case
            user=self.user,
            dst_folder=dst_folder)

        auto.tags.add('invoice', 'tags', tag_kwargs={'user': self.user})

        doc.add_tags(auto.tags.all())

        self.assertEquals(doc.tags.count(), 2)

        self.assertEquals(set([tag.name for tag in doc.tags.all()]),
                          set([tag.name for tag in auto.tags.all()]))
Beispiel #9
0
    def test_search_is_not_case_sensitive(self):
        """
        UT to double check that search by default is NOT case sensitive
        """
        backend = get_search_backend()

        doc = Document.create_document(
            title="document_c",
            file_name="document_c.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            page_count=5,
        )

        p = doc.pages.first()
        p.text = "search for TESTX text"
        p.save()

        result = backend.search("TESTX", Page)
        # it matches exact case
        self.assertEqual(result.count(), 1)

        result_case_insensitive_match = backend.search("testX", Page)
        # it matches lower and upper case mix
        self.assertEqual(result_case_insensitive_match.count(), 1)

        # no match for tst
        no_match = backend.search("tst", Page)
        self.assertEqual(no_match.count(), 0)
 def test_folders_kvstore_propagates_add_to_subfolders(self):
     """
     Folder's kvstore propagates to all its descendent folders,
     documents, pages
     """
     top = Folder.objects.create(title="top", user=self.user)
     top.save()
     sub = Folder.objects.create(title="sub", parent=top, user=self.user)
     sub.save()
     doc = Document.create_document(
         title="document_in_sub",
         file_name="document_sub.pdf",
         size='1212',
         lang='DEU',
         user=self.user,
         parent_id=sub.id,
         page_count=5,
     )
     doc.save()
     self.assertEqual(0, top.kvstore.count())
     self.assertEqual(0, sub.kvstore.count())
     self.assertEqual(0, doc.kvstore.count())
     top.kv.add(key="shop")
     self.assertEqual(1, top.kvstore.count())
     # kvstore propagated from parent folder to descendents
     self.assertEqual(1, sub.kvstore.count())
     # kvstore propagated from ancestor folder to doc
     self.assertEqual(1, doc.kvstore.count())
Beispiel #11
0
    def test_language_is_inherited(self):
        """
        Whatever document model has in doc.lang field
        will be inherited by the related page models.
        """
        doc = Document.create_document(
            title="kyuss.pdf",
            user=self.user,
            lang="ENG",
            file_name="kyuss.pdf",
            size=1222,
            page_count=3
        )

        doc.save()

        self.assertEqual(
            doc.pages.count(),
            3
        )

        langs = [
            page.lang for page in doc.pages.all()
        ]

        self.assertEqual(
            ['ENG', 'ENG', 'ENG'],
            langs
        )
Beispiel #12
0
 def test_preview(self):
     doc = Document.create_document(
         title="andromeda.pdf",
         user=self.testcase_user,
         lang="ENG",
         file_name="andromeda.pdf",
         size=1222,
         page_count=3
     )
     copy2doc_url(
         src_file_path=os.path.join(
             BASE_DIR, "data", "andromeda.pdf"
         ),
         doc_url=doc.doc_ep.url()
     )
     ret = self.client.post(
         reverse('core:preview', args=(doc.id, 1, 1))
     )
     self.assertEqual(
         ret.status_code,
         200
     )
     page_url = PageEp(
         document_ep=doc.doc_ep,
         page_num=1,
         step=Step(1),
         page_count=3
     )
     self.assertTrue(
         os.path.exists(page_url.img_exists())
     )
Beispiel #13
0
    def test_paste_view_access_forbidden(self):
        """
        Access forbidden for margaret
        """
        doc = Document.create_document(
            title="berlin.pdf",
            user=self.testcase_user,
            lang="ENG",
            file_name="berlin.pdf",
            size=1222,
            page_count=3,
        )

        # margaret does not have access to the document
        self.client.login(testcase_user=self.margaret_user)

        post_data = [1, 2]

        ret = self.client.post(reverse('core:api_pages_paste',
                                       args=(doc.id, )),
                               json.dumps(post_data),
                               content_type='application/json',
                               HTTP_X_REQUESTED_WITH='XMLHttpRequest')

        self.assertEquals(ret.status_code, HttpResponseForbidden.status_code)
Beispiel #14
0
    def test_delete_document_with_parent(self):
        """
        Document D is child of folder F.
        Deleting document D, will result in... well...
        no more D around, but F still present.
        """
        folder = Folder.objects.create(user=self.user, title="F")

        doc = Document.create_document(title="andromeda.pdf",
                                       user=self.user,
                                       lang="ENG",
                                       file_name="andromeda.pdf",
                                       size=1222,
                                       page_count=3)
        doc.parent = folder
        doc.save()
        folder.save()
        count = folder.get_children().count()
        self.assertEqual(count, 1,
                         f"Folder {folder.title} has {count} children")
        self.assertEqual(doc.pages.count(), 3)

        doc.delete()

        self.assertEqual(
            folder.get_children().count(),
            0,
        )

        with self.assertRaises(Document.DoesNotExist):
            Document.objects.get(title="D")
Beispiel #15
0
    def test_tree_path(self):
        """
        Create following structure:
            Folder A > Folder B > Document C
        and check ancestors of Document C
        """

        folder_a = Folder.objects.create(title="folder_a",
                                         user=self.user,
                                         parent_id=None)
        folder_a.save()

        folder_b = Folder.objects.create(title="folder_b",
                                         user=self.user,
                                         parent_id=folder_a.id)
        folder_b.save()

        doc = Document.create_document(
            title="document_c",
            file_name="document_c.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            parent_id=folder_b.id,
            page_count=5,
        )
        doc.save()
        ancestors = [[node.title, node.id]
                     for node in doc.get_ancestors(include_self=True)]
        self.assertListEqual(
            ancestors, [['folder_a', folder_a.id], ['folder_b', folder_b.id],
                        ['document_c', doc.id]])
        self.assertEqual(doc.pages.count(), 5)
Beispiel #16
0
 def test_document_inherits_kv_from_parent_folder(self):
     """
     Newly added focuments into the folder will inherit folder's
     kv metadata.
     """
     top = Folder.objects.create(
         title="top",
         user=self.user,
     )
     top.save()
     top.kv.update([{
         'key': 'shop',
         'kv_type': 'text',
         'kv_format': ''
     }, {
         'key': 'total',
         'kv_type': 'money',
         'kv_format': 'dd.cc'
     }])
     doc = Document.create_document(
         title="document_c",
         file_name="document_c.pdf",
         size='1212',
         lang='DEU',
         user=self.user,
         parent_id=top.id,
         page_count=5,
     )
     doc.save()
     self.assertEqual(2, doc.kv.count())
     self.assertEqual(set(doc.kv.typed_keys()), set(top.kv.typed_keys()))
    def test_download_hocr_which_does_not_exists(self):
        """
        HOCR might not be available. It is a normal case
        (page OCR task is still in the queue/progress).

        Missing HCOR file => HTTP 404 return code is expected.
        """
        doc = Document.create_document(
            title="berlin.pdf",
            user=self.testcase_user,
            lang="ENG",
            file_name="berlin.pdf",
            size=1222,
            page_count=3
        )
        # Doc is available (for get_pagecount on server side).
        default_storage.copy_doc(
            src=os.path.join(
                BASE_DIR, "data", "berlin.pdf"
            ),
            dst=doc.path.url()
        )
        # But HOCR file is missing.
        ret = self.client.get(
            reverse('core:hocr', args=(doc.id, 1, 1))
        )
        self.assertEqual(
            ret.status_code,
            404
        )
Beispiel #18
0
    def test_basic_av_by_tag(self):
        """
        In advaced search user can search by tag(s)
        """
        doc1 = Document.create_document(
            title="doc1",
            user=self.testcase_user,
            page_count=2,
            file_name="koko.pdf",
            size='1111',
            lang='ENG',
        )
        doc2 = Document.create_document(
            title="doc2",
            user=self.testcase_user,
            page_count=2,
            file_name="kuku.pdf",
            size='1111',
            lang='ENG',
        )
        doc1.tags.add(
            "green",
            "blue",
            tag_kwargs={'user': self.testcase_user}
        )
        doc2.tags.add(
            "blue",
            tag_kwargs={'user': self.testcase_user}
        )

        ret = self.client.get(
            reverse('admin:search'), {'tag': 'green'}
        )
        self.assertEquals(
            ret.status_code,
            200
        )
        self.assertEquals(
            len(ret.context['results_docs']),
            1
        )
        doc_ = ret.context['results_docs'][0]

        self.assertEquals(
            doc_.id,
            doc1.id
        )
Beispiel #19
0
    def test_metadata_created_on_existing_folder_document_structure(self):
        """
        Consider following folder structure:

                Home
                 |
                Folder_A
                 |
                document.pdf

        1. User adds metadata named price (of money type with format 'dd,cc').
         Notice that at the time of adding metadata - document.pdf
         already exists in Folder_A.

        2. Expected:
            a. document.pdf inherits price metadata
            b. all pages of document.pdf inherit price metadata as well
            c. metadata is inherited with correct format/type
        """
        folder_A = Folder.objects.create(title="folder_A", user=self.user)
        doc = Document.create_document(
            title="document.pdf",
            file_name="document.pdf",
            size='1989',
            lang='DEU',
            user=self.user,
            parent_id=folder_A.id,  # document.pdf is inside folder_A
            page_count=5,
        )
        doc.save()

        folder_A = Folder.objects.get(id=folder_A.id)

        self.assertEqual(folder_A.get_children().count(), 1)
        # attach/add metadata to the folder_A
        folder_A.kv.update([{
            'key': 'price',
            'kv_type': MONEY,
            'kv_format': 'dd,cc'
        }])
        # metadata was added to the folder_A
        self.assertEqual(folder_A.kv.all().count(), 1)
        document_kvs = doc.kv.all()

        # document inherited metadata
        self.assertEqual(document_kvs.count(), 1)
        # and document's metadata is of correct format
        self.assertEqual(document_kvs[0].kv_type, MONEY)

        self.assertEqual(document_kvs[0].kv_format, "dd,cc")
        # now check if metadata was propagated to first page
        page = doc.pages.first()
        page_kv = page.kv.all()
        self.assertEqual(page_kv.count(), 1,
                         "Metadata was not propagated to document's page")
        # and test if metadata has correct format
        self.assertEqual(page_kv[0].kv_type, MONEY)

        self.assertEqual(page_kv[0].kv_format, "dd,cc")
Beispiel #20
0
    def import_file(self,
                    file_title=None,
                    inbox_title="Inbox",
                    delete_after_import=True,
                    skip_ocr=False):
        """
        Gets as input a path to a file on a local file system and:
            1. creates a document instance
            2. Copies file to doc_instance.url()
            4. OCR the doc

        Used with
            ./manage.py local_importer
            ./manage.py imap_importer
        command
        """
        logger.debug(f"Importing file {self.filepath}")

        if file_title is None:
            file_title = os.path.basename(self.filepath)

        try:
            page_count = get_pagecount(self.filepath)
        except Exception:
            logger.error(f"Error while getting page count of {self.filepath}.")
            return False

        inbox, _ = Folder.objects.get_or_create(title=inbox_title,
                                                parent=None,
                                                user=self.user)
        doc = Document.create_document(user=self.user,
                                       title=file_title,
                                       size=os.path.getsize(self.filepath),
                                       lang=self.user_ocr_language,
                                       file_name=file_title,
                                       parent_id=inbox.id,
                                       page_count=page_count)
        logger.debug(f"Uploading file {self.filepath} to {doc.path.url()}")
        default_storage.copy_doc(
            src=self.filepath,
            dst=doc.path.url(),
        )
        if not skip_ocr:
            DocumentImporter.ocr_document(
                document=doc,
                page_count=page_count,
                lang=self.user_ocr_language,
            )

        if delete_after_import:
            # Usually we want to delete files when importing
            # them from local directory
            # When importing from Email attachment - deleting
            # files does not apply
            os.remove(self.filepath)

        logger.debug("Import complete.")

        return doc
Beispiel #21
0
    def test_automate_apply(self):
        """
        test automate.apply method
        """

        # automates are applicable only for documents
        # in inbox folder
        folder, _ = Folder.objects.get_or_create(title=Folder.INBOX_NAME,
                                                 user=self.user)
        document = Document.create_document(
            title="document_c",
            file_name="document_c.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            parent_id=folder.id,
            page_count=5,
        )
        document2 = Document.create_document(
            title="document_c",
            file_name="document_c.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            parent_id=folder.id,
            page_count=5,
        )
        # automate with tags
        automate = _create_am_any("test", "test", self.user)
        automate.tags.set("test", "one", tag_kwargs={'user': self.user})
        # make sure no exception is rised
        automate.apply(
            document=document,
            page_num=1,
            text="test",
        )
        # without tags
        automate2 = _create_am_any("test2", "test", self.user)

        # make sure no exception is rised
        automate2.apply(
            document=document2,
            page_num=1,
            text="test",
        )
    def test_documents_returns_only_docs_user_has_perms_for_1(self):
        """
        User can list via API only docs he/she has access READ_ACCESS
        """
        # create a basic document and assert
        # that api/documents returns it
        Document.create_document(title="berlin.pdf",
                                 user=self.margaret_user,
                                 lang="ENG",
                                 file_name="berlin.pdf",
                                 size=1222,
                                 page_count=3)
        ret = self.client.get(reverse('core:api_documents'))

        self.assertEqual(ret.status_code, 200)
        docs = json.loads(ret.content)
        # There is only one document in the system, and
        # only margaret has access to it (as owner)
        self.assertEqual(len(docs), 0)
Beispiel #23
0
    def post(self, request, doc_id):
        """
        Paste pages (within document view).
        NO new document is created.
        Code for pasting document in changelist view (i.e. when
        a NEW document is created) is in
        papermerge.core.views.documents.paste_pages
        """
        before = request.POST.get('before', False)
        after = request.POST.get('after', False)

        try:
            document = Document.objects.get(id=doc_id)
        except Document.DoesNotExist:
            raise Http404("Document does not exists")

        if request.user.has_perm(
            Access.PERM_WRITE, document
        ):
            Document.paste_pages(
                user=request.user,
                parent_id=document.parent,
                dst_document=document,
                doc_pages=request.pages.all(),
                before=before,
                after=after
            )

            request.pages.clear()

            return Response(status=status.HTTP_204_NO_CONTENT)

        return Response(
            status=status.HTTP_403_FORBIDDEN,
            data={
                'msg': _(
                    "You don't have permissions to paste pages"
                    " in this document."
                )
            }
        )
Beispiel #24
0
    def put(self, request, filename):
        file_obj = request.data['file']

        doc = Document.import_file(file_obj.temporary_file_path(),
                                   username=request.user.username,
                                   file_title=filename)

        if isinstance(doc, Document):
            serializer = DocumentSerializer(doc)
            return Response(serializer.data)

        return Response(status=200)
Beispiel #25
0
def paste_pages(request):
    """
    Paste pages in a changelist view.
    This means a new document instance
    is created.
    """
    data = json.loads(request.body)
    parent_id = data.get('parent_id', None)

    if parent_id:
        parent_id = int(parent_id)

    Document.paste_pages(user=request.user,
                         parent_id=parent_id,
                         doc_pages=request.pages.all())

    request.pages.clear()

    return HttpResponse(
        json.dumps({'msg': 'OK'}),
        content_type="application/json",
    )
Beispiel #26
0
    def test_recreate_page_models(self):
        doc = Document.create_document(title="kyuss.pdf",
                                       user=self.user,
                                       lang="ENG",
                                       file_name="kyuss.pdf",
                                       size=1222,
                                       page_count=3)

        doc.save()

        self.assertEqual(doc.page_set.count(), 3)
        doc.page_set.all().delete()
        self.assertEqual(doc.page_set.count(), 0)
Beispiel #27
0
 def test_download(self):
     doc = Document.create_document(title="andromeda.pdf",
                                    user=self.testcase_user,
                                    lang="ENG",
                                    file_name="andromeda.pdf",
                                    size=1222,
                                    page_count=3)
     copy2doc_url(src_file_path=os.path.join(BASE_DIR, "data",
                                             "andromeda.pdf"),
                  doc_url=doc.doc_ep.url())
     ret = self.client.post(
         reverse('core:document_download', args=(doc.id, )))
     self.assertEqual(ret.status_code, 200)
Beispiel #28
0
    def test_node_is_document(self):
        node = Document.create_document(
            title="document_node",
            file_name="document_node.pdf",
            size='1212',
            lang='DEU',
            user=self.user,
            page_count=5,
        )
        node.save()

        self.assertTrue(node.is_document())
        self.assertFalse(node.is_folder())
Beispiel #29
0
 def test_download(self):
     doc = Document.create_document(title="berlin.pdf",
                                    user=self.testcase_user,
                                    lang="ENG",
                                    file_name="berlin.pdf",
                                    size=1222,
                                    page_count=3)
     default_storage.copy_doc(src=os.path.join(BASE_DIR, "data",
                                               "berlin.pdf"),
                              dst=doc.path.url())
     ret = self.client.post(
         reverse('core:document_download', args=(doc.id, )))
     self.assertEqual(ret.status_code, 200)
    def test_returns_only_doc_user_has_perms_for_1(self):
        """
        User can view via API only document he/she has access READ_ACCESS
        """
        doc = Document.create_document(title="berlin.pdf",
                                       user=self.margaret_user,
                                       lang="ENG",
                                       file_name="berlin.pdf",
                                       size=1222,
                                       page_count=3)
        ret = self.client.get(reverse('core:api_document', args=(doc.id, )))

        self.assertEqual(ret.status_code, HttpResponseForbidden.status_code)