コード例 #1
0
class Engine2ConversionTest(unittest.TestCase):
    def setUp(self):
        self._sm = SourceManager()

        self._ir = image_handle.follow(self._sm)
        self._hr = html_handle.follow(self._sm)

    def tearDown(self):
        self._sm.clear()

    def test_last_modified(self):
        self.assertIsNotNone(
                convert(self._ir, OutputType.LastModified).value)

    def test_image_dimensions(self):
        self.assertEqual(
                convert(self._ir, OutputType.ImageDimensions).value,
                (896, 896))

    def test_fallback(self):
        self.assertEqual(
                convert(self._ir, OutputType.Fallback).value,
                True)

    def test_dummy(self):
        with self.assertRaises(KeyError):
            convert(self._ir, OutputType.Dummy)

    def test_html(self):
        self.assertIn(
                "This is only a test.",
                convert(self._hr, OutputType.Text).value)
コード例 #2
0
def handle_message(body, channel):
    if channel == "os2ds_scan_specs":
        with SourceManager() as sm:
            yield from explorer.message_received_raw(body, channel, sm,
                                                     "os2ds_conversions",
                                                     "os2ds_problems", None)
    elif channel == "os2ds_conversions":
        with SourceManager() as sm:
            yield from processor.message_received_raw(body, channel, sm,
                                                      "os2ds_representations",
                                                      "os2ds_scan_specs",
                                                      ["os2ds_problems"])
    elif channel == "os2ds_representations":
        yield from matcher.message_received_raw(body, channel,
                                                ["os2ds_matches"],
                                                "os2ds_handles",
                                                "os2ds_conversions")
    elif channel == "os2ds_handles":
        with SourceManager() as sm:
            yield from tagger.message_received_raw(body, channel, sm,
                                                   "os2ds_metadata",
                                                   "os2ds_problems")
    elif channel in (
            "os2ds_matches",
            "os2ds_metadata",
            "os2ds_problems",
    ):
        yield from exporter.message_received_raw(body, channel, False,
                                                 "os2ds_results")
コード例 #3
0
 def test_exploration_index(self):
     count = 0
     with SourceManager() as sm:
         for h in indexed_mapped_site.handles(sm):
             count += 1
     self.assertEqual(
         count, 6, "embedded site with sitemap index should have 6 handles")
コード例 #4
0
 def test_exploration_data_sitemap(self):
     count = 0
     with SourceManager() as sm:
         for h in embedded_mapped_site.handles(sm):
             count += 1
     self.assertEqual(
         count, 4, "embedded site with data: sitemap should have 4 handles")
コード例 #5
0
 def test_generator_exception(self):
     source = BrokenSource()
     with SourceManager() as sm:
         with self.assertRaises(ValueError):
             sm.open(source)
         self.assertFalse(source in sm,
                          "_generate_state failed, but Source still open")
コード例 #6
0
 def test_odt_extraction(self):
     with SourceManager() as sm:
         metadata = guess_responsible_party(self.handle_proxy, sm)
     self.assertEqual(metadata["od-modifier"], "Alexander John Faithfull",
                      "metadata extraction failed")
     self.assertEqual(self.handle_proxy.get_attr_access_count("follow"), 0,
                      "metadata extraction from synthetic file attempted")
コード例 #7
0
 def test_generator_exception2(self):
     source = BrokenSource()
     with SourceManager() as sm:
         with self.assertRaises(ValueError):
             sm.open(source)
         with self.assertRaises(ValueError):
             sm.open(source)
コード例 #8
0
    def test_missing_headers(self):
        with SourceManager() as sm:
            first_thing = None
            with contextlib.closing(magenta.handles(sm)) as handles:
                first_thing = next(handles)
            r = first_thing.follow(sm)

            now = datetime.now()

            # It is not documented anywhere that WebResource.get_header()
            # returns a live dictionary, so don't depend on this behaviour
            header = r.unpack_header()
            for name in (
                    "content-type",
                    OutputType.LastModified,
            ):
                if name in header:
                    del header[name]

            self.assertEqual(
                r.compute_type(), "application/octet-stream",
                "{0}: unexpected backup MIME type".format(first_thing))
            self.assertGreaterEqual(
                r.get_last_modified().value, now,
                "{0}: Last-Modified not fresh".format(first_thing))
コード例 #9
0
def test_exploration():
    count = 0
    with SourceManager() as sm:
        for h in site.handles(sm):
            count += 1
            print(h.relative_path)
    print(f"Embedded site should have 3 handles. Have {count}")
コード例 #10
0
 def test_corrupted_ocr(self):
     fs = FilesystemSource(os.path.join(test_data_path, "corrupted"))
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             self.assertEqual(convert(resource, OutputType.Text), None,
                              "{0}: error handling failed".format(h))
コード例 #11
0
 def test_ocr_conversions(self):
     fs = FilesystemSource(os.path.join(test_data_path, "good"))
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             self.assertEqual(
                 convert(resource, OutputType.Text).value, expected_result,
                 "{0}: content failed".format(h))
コード例 #12
0
 def test_corrupted_doc(self):
     corrupted_doc_handle = FilesystemHandle.make_handle(
         os.path.join(test_data_path, "msoffice/corrupted/test.trunc.doc"))
     corrupted_doc = Source.from_handle(corrupted_doc_handle)
     with SourceManager() as sm:
         self.assertEqual(
             list(corrupted_doc.handles(sm)), [],
             "unrecognised CDFV2 document should be empty and wasn't")
コード例 #13
0
    def test_derived_source(self):
        with SourceManager() as sm:
            s = FilesystemSource(test_data_path)
            h = FilesystemHandle(s, "data/engine2/zip-here/test-vector.zip")

            zs = Source.from_handle(h)
            self.assertIsNotNone(
                zs.handle, "{0}: derived source has no handle".format(zs))
コード例 #14
0
 def test_basic(self):
     tracker = Tracker()
     with SourceManager() as sm:
         sm.open(tracker)
         sm.open(tracker)
         self.assertEqual(tracker.count, 1,
                          "SourceManager opened the same object twice")
     self.assertEqual(tracker.count, 0,
                      "SourceManager didn't close the object")
コード例 #15
0
 def test_exploration(self):
     count = 0
     with SourceManager() as sm:
         for h in magenta.handles(sm):
             if count == 5:
                 break
             else:
                 count += 1
     self.assertEqual(count, 5, "magenta.dk should have more than 5 pages")
コード例 #16
0
 def test_alternative_trimming(self):
     alternative_source = MailSource(
             FilesystemHandle.make_handle(
                     os.path.join(test_data_path, "alternative.eml")))
     with SourceManager() as sm:
         self.assertEqual(
                 len(list(alternative_source.handles(sm))),
                 1,
                 "text/plain trimming failed")
コード例 #17
0
 def run_rule(self, source):
     with SourceManager() as sm:
         results = list(try_apply(sm, source, self.rule))
         self.assertEqual(results, [{
             "offset": 0,
             "match": "1310XXXXXX",
             "context": "XXXXXX-XXXX",
             "context_offset": 0
         }])
コード例 #18
0
    def test_broken_page_handling(self):
        h = WebHandle(WebSource("http://localhost:64346/"), "broken.html")
        with SourceManager() as sm:
            with h.follow(sm).make_stream() as fp:
                content = fp.read().decode()

        self.assertEqual(
            list(make_outlinks(content, "http://localhost:64346/broken.html")),
            ["http://localhost:64346/kontakt.html"],
            "expected one link to be found in broken document")
コード例 #19
0
def test_exploration_sitemap():
    count = 0
    with SourceManager() as sm:
        for h in mapped_site.handles(sm):
            count += 1
            print(h.relative_path)
            if h.relative_path == "hemmeligheder2.html":
                lm = h.follow(sm).get_last_modified().value
                print('modification date', lm.year, lm.month, lm.day)
    print(f"embedded site with sitemap should have 5 handles. Have {count}")
コード例 #20
0
 def test_followable(self):
     with SourceManager() as sm:
         for handle in example_handles:
             with self.subTest(handle):
                 try:
                     handle.follow(sm)
                 except TypeError:
                     raise
                 except Exception:
                     pass
コード例 #21
0
 def test_smbc_url(self):
     with SourceManager() as sm:
         source = Source.from_url(
             "smbc://*****:*****@samba/general")
         try:
             with contextlib.closing(source.handles(sm)) as c:
                 next(c)
         except Exception:
             self.skipTest("test Samba server not up (not running in CI?)")
         self.process(source, sm)
コード例 #22
0
 def test_doc_mime(self):
     self.assertEqual(
             doc_handle.guess_type(),
             "application/msword",
             ".doc MIME guess is incorrect")
     with SourceManager() as sm:
         self.assertEqual(
                 doc_handle.follow(sm).compute_type(),
                 "application/msword",
                 ".doc MIME computation is incorrect")
コード例 #23
0
 def test_libreoffice_size(self):
     large_doc_handle = FilesystemHandle.make_handle(
         os.path.join(test_data_path, "libreoffice/html-explosion.ods"))
     large_doc = Source.from_handle(large_doc_handle)
     with SourceManager() as sm:
         for h in large_doc.handles(sm):
             if h.name.endswith(".html"):
                 r = h.follow(sm)
                 self.assertLess(r.get_size().value, 1048576,
                                 "LibreOffice HTML output was too big")
コード例 #24
0
 def test_encrypted_zip(self):
     # Check that all the ZipHandles we get out of an encrypted Zip file
     # actually work. (It's fine if we don't get any, but the ones we *do*
     # need to work!)
     encrypted_file = ZipSource(
             FilesystemHandle(
                     FilesystemSource(test_data_path),
                     "encrypted-test-vector.zip"))
     with SourceManager() as sm:
         for h in encrypted_file.handles(sm):
             h.follow(sm).compute_type()
コード例 #25
0
 def test_docx_mime(self):
     self.assertEqual(
             docx_handle.guess_type(),
             "application/vnd.openxmlformats-officedocument"
                     ".wordprocessingml.document",
             ".docx MIME guess is incorrect")
     with SourceManager() as sm:
         self.assertEqual(
                 docx_handle.follow(sm).compute_type(),
                 "application/vnd.openxmlformats-officedocument"
                         ".wordprocessingml.document",
                 ".docx MIME computation is incorrect")
コード例 #26
0
 def test_size_computation(self):
     fs = FilesystemSource(test_data_path)
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             size = convert(resource, OutputType.ImageDimensions)
             if not size:
                 if "rgba32" in h.relative_path:
                     self.skipTest("Pillow RGBA bug detected -- skipping")
             else:
                 size = size.value
             self.assertEqual(size, expected_size, "{0}: size failed")
コード例 #27
0
 def test_sitemap_lm(self):
     count = 0
     with SourceManager() as sm:
         for h in indexed_mapped_site.handles(sm):
             if h.relative_path == "hemmeligheder2.html":
                 lm = h.follow(sm).get_last_modified().value
                 self.assertEqual(
                     (lm.year, lm.month, lm.day), (2011, 12, 1),
                     "secret file's modification date is too late")
                 break
         else:
             self.fail("secret file missing")
コード例 #28
0
 def handle(self, **kwargs):
     urls = kwargs['urls']
     guess, summarise = kwargs['guess'], kwargs['summarise']
     with SourceManager() as sm:
         for i in urls:
             try:
                 s = Source.from_url(i)
                 url_explorer.print_source(sm,
                                           s,
                                           guess=guess,
                                           summarise=summarise)
             except UnknownSchemeError:
                 pass
コード例 #29
0
 def verify(self) -> bool:
     for account in self.generate_sources():
         with SourceManager() as sm:
             try:
                 exchangelib_object = sm.open(account)
                 if exchangelib_object.msg_folder_root:
                     print(
                         "OS2datascanner has access to mailbox {0}".format(
                             account.address))
             except ErrorNonExistentMailbox:
                 print("Mailbox {0} does not exits".format(account.address))
                 return False
     return True
コード例 #30
0
 def test_eml_files(self):
     fs = FilesystemSource(test_data_path)
     with SourceManager() as sm:
         for h in fs.handles(sm):
             mail_source = Source.from_handle(h)
             self.assertIsInstance(
                     mail_source,
                     MailSource,
                     "conversion of {0} to MailSource failed".format(h))
             for h in mail_source.handles(sm):
                 self.assertIsInstance(
                         h,
                         MailPartHandle)