Ejemplos de extract_doc_content en Python

Lenguaje de programación: Python

Namespace/Package Name: cl.scrapers.tasks

Método / Función: extract_doc_content

Ejemplos en hotexamples.com: 10

Python extract_doc_content - 10 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de cl.scrapers.tasks.extract_doc_content extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

    def test_juriscraper_docket_number_extraction(self):
        """Can we extract docket number from tax court pdf and add to db?"""

        o = Opinion.objects.get(pk=76)
        self.assertEqual(
            None,
            o.cluster.docket.docket_number,
            msg="Docket number should be none.",
        )
        extract_doc_content(pk=76, do_ocr=False)
        o.cluster.docket.refresh_from_db()
        self.assertEqual("19031-13, 27735-13, 11905-14",
                         o.cluster.docket.docket_number)

Ejemplo n.º 2

Mostrar archivo

    def test_juriscraper_object_creation(self):
        """Can we extract text from tax court pdf and add to db?"""

        o = Opinion.objects.get(pk=76)
        self.assertFalse(
            o.cluster.citations.exists(),
            msg="Citation should not exist at beginning of test",
        )

        extract_doc_content(pk=o.pk, do_ocr=False)
        self.assertTrue(
            o.cluster.citations.exists(),
            msg="Expected citation was not created in db",
        )

Ejemplo n.º 3

Mostrar archivo

 def test_content_extraction(self):
     """Do all of the supported mimetypes get extracted to text
     successfully, including OCR?"""
     test_strings = [
         'supreme', 'intelligence', 'indiana', 'reagan', 'indiana',
         'fidelity'
     ]
     opinions = Opinion.objects.all()
     for op, test_string in zip(opinions, test_strings):
         ext = get_extension(op.local_path.file.read())
         extract_doc_content(op.pk, do_ocr=True)
         op.refresh_from_db()
         if ext in ['.html', '.wpd']:
             self.assertIn(test_string, op.html.lower())
         else:
             self.assertIn(test_string, op.plain_text.lower())

Ejemplo n.º 4

Mostrar archivo

Archivo: tests.py Proyecto: freelawproject/courtlistener

 def test_content_extraction(self):
     """Do all of the supported mimetypes get extracted to text
     successfully, including OCR?"""
     test_strings = [
         'supreme',
         'intelligence',
         'indiana',
         'reagan',
         'indiana',
         'fidelity'
     ]
     opinions = Opinion.objects.all()
     for op, test_string in zip(opinions, test_strings):
         ext = get_extension(op.local_path.file.read())
         extract_doc_content(op.pk, do_ocr=True)
         op.refresh_from_db()
         if ext in ['.html', '.wpd']:
             self.assertIn(test_string, op.html.lower())
         else:
             self.assertIn(test_string, op.plain_text.lower())

Ejemplo n.º 5

Mostrar archivo

 def test_wpd_content_extraction(self):
     """Can we ingest a wpd file?"""
     wpd_opinion = Opinion.objects.get(pk=5)
     extract_doc_content(wpd_opinion.pk, do_ocr=False)
     wpd_opinion.refresh_from_db()
     self.assertIn("greene", wpd_opinion.html.lower())

Ejemplo n.º 6

Mostrar archivo

 def test_html_content_extraction(self):
     """Can we ingest an html file?"""
     html_opinion = Opinion.objects.get(pk=4)
     extract_doc_content(html_opinion.pk, do_ocr=False)
     html_opinion.refresh_from_db()
     self.assertIn("reagan", html_opinion.html.lower())

Ejemplo n.º 7

Mostrar archivo

 def test_text_based_pdf(self):
     """Can we ingest a text based pdf file?"""
     txt_opinion = Opinion.objects.get(pk=3)
     extract_doc_content(txt_opinion.pk, do_ocr=False)
     txt_opinion.refresh_from_db()
     self.assertIn("tarrant", txt_opinion.plain_text.lower())

Ejemplo n.º 8

Mostrar archivo

 def test_image_based_pdf(self):
     """Can we ingest an image based pdf file?"""
     image_opinion = Opinion.objects.get(pk=2)
     extract_doc_content(image_opinion.pk, do_ocr=True)
     image_opinion.refresh_from_db()
     self.assertIn("intelligence", image_opinion.plain_text.lower())

Ejemplo n.º 9

Mostrar archivo

 def test_doc_content_extraction(self):
     """Can we ingest a doc file?"""
     image_opinion = Opinion.objects.get(pk=1)
     extract_doc_content(image_opinion.pk, do_ocr=False)
     image_opinion.refresh_from_db()
     self.assertIn("indiana", image_opinion.plain_text.lower())

Ejemplo n.º 10

Mostrar archivo

 def test_txt_content_extraction(self):
     """Can we ingest a txt file?"""
     txt_opinion = Opinion.objects.get(pk=6)
     extract_doc_content(txt_opinion.pk, do_ocr=False)
     txt_opinion.refresh_from_db()
     self.assertIn("ideal", txt_opinion.plain_text.lower())