コード例 #1
0
    def process(self, source, sm, depth=0):
        if depth == 0:
            self.assertIsNone(source.handle,
                              "{0}: unexpected backing handle".format(source))
        for handle in source.handles(sm):
            print("{0}{1}".format("  " * depth, handle))
            guessed = Source.from_handle(handle)
            computed = Source.from_handle(handle, sm)

            if computed or guessed:
                self.process(computed or guessed, sm, depth + 1)

            elif handle.name == "url":
                with handle.follow(sm).make_stream() as fp:
                    url = fp.read().decode("utf-8")
                self.process(Source.from_url(url), sm, depth + 1)

            elif handle.name == "test-vector" or isinstance(
                    source, DataSource):
                r = handle.follow(sm)

                self.assertTrue(r.check(), "check() method failed")
                reported_size = r.get_size()
                last_modified = r.get_last_modified()

                with r.make_stream() as fp:
                    stream_raw = fp.read()
                    stream_size = len(stream_raw)
                    stream_content = stream_raw.decode("utf-8")
                with r.make_path() as p:
                    with open(p, "rb") as fp:
                        file_raw = fp.read()
                        file_size = len(file_raw)
                        file_content = file_raw.decode("utf-8")

                self.assertIsInstance(last_modified, SingleResult,
                                      ("{0}: last modification date is not a"
                                       " SingleResult").format(handle))
                self.assertIsInstance(
                    last_modified.value, datetime,
                    ("{0}: last modification date value is not a"
                     "datetime.datetime").format(handle))

                self.assertIsInstance(reported_size, SingleResult,
                                      ("{0}: resource length is not a"
                                       " SingleResult").format(handle))
                self.assertEqual(
                    stream_size, reported_size.value,
                    "{0}: model stream length invalid".format(handle))
                self.assertEqual(
                    file_size, reported_size.value,
                    "{0}: model stream length invalid".format(handle))
                self.assertEqual(
                    file_raw, stream_raw,
                    "{0}: model file and stream not equal".format(handle))
                self.assertEqual(stream_content, self.correct_content,
                                 "{0}: model stream invalid".format(handle))
                self.assertEqual(file_content, self.correct_content,
                                 "{0}: model file invalid".format(handle))
コード例 #2
0
    def test_derived_source(self):
        with SourceManager() as sm:
            s = FilesystemSource(test_data_path)
            h = FilesystemHandle(s, "data/engine2/zip-here/test-vector.zip")

            zs = Source.from_handle(h)
            self.assertIsNotNone(
                zs.handle, "{0}: derived source has no handle".format(zs))
コード例 #3
0
 def test_corrupted_doc(self):
     corrupted_doc_handle = FilesystemHandle.make_handle(
         os.path.join(test_data_path, "msoffice/corrupted/test.trunc.doc"))
     corrupted_doc = Source.from_handle(corrupted_doc_handle)
     with SourceManager() as sm:
         self.assertEqual(
             list(corrupted_doc.handles(sm)), [],
             "unrecognised CDFV2 document should be empty and wasn't")
コード例 #4
0
ファイル: timing_cpr_rule.py プロジェクト: pawsen/magenta
def try_apply(sm, source):
    for handle in source.handles(sm):
        derived = Source.from_handle(handle, sm)
        if derived:
            try_apply(sm, derived)
        else:
            resource = handle.follow(sm)
            representation = convert(resource, OutputType.Text)
            return representation.value
コード例 #5
0
def try_apply(sm, source, rule):
    for handle in source.handles(sm):
        derived = Source.from_handle(handle, sm)
        if derived:
            yield from try_apply(sm, derived, rule)
        else:
            resource = handle.follow(sm)
            representation = convert(resource, rule.operates_on)
            if representation:
                yield from rule.match(representation.value)
コード例 #6
0
 def test_libreoffice_size(self):
     large_doc_handle = FilesystemHandle.make_handle(
         os.path.join(test_data_path, "libreoffice/html-explosion.ods"))
     large_doc = Source.from_handle(large_doc_handle)
     with SourceManager() as sm:
         for h in large_doc.handles(sm):
             if h.name.endswith(".html"):
                 r = h.follow(sm)
                 self.assertLess(r.get_size().value, 1048576,
                                 "LibreOffice HTML output was too big")
コード例 #7
0
 def test_eml_files(self):
     fs = FilesystemSource(test_data_path)
     with SourceManager() as sm:
         for h in fs.handles(sm):
             mail_source = Source.from_handle(h)
             self.assertIsInstance(
                     mail_source,
                     MailSource,
                     "conversion of {0} to MailSource failed".format(h))
             for h in mail_source.handles(sm):
                 self.assertIsInstance(
                         h,
                         MailPartHandle)
コード例 #8
0
ファイル: timing_cpr_rule.py プロジェクト: pawsen/magenta
def get_content_from_handle(handle):
    with SourceManager() as sm:
        source = Source.from_handle(handle, sm)
        assert source is not None, f"{handle} cound not be made into a Source"
        return try_apply(sm, source)
コード例 #9
0
        #if h.guess_type() == "text/plain":
        print(f"handle\t{h}")
        print(f"resource\t{r}")

        print("raw content:")
        with r.make_stream() as fp:
            print("\t\t{0}".format(fp.read()))

        # should succed for text -> text conversion
        try:
            rep = convert(r, OutputType.Text)
            print(f"Conveted\t{rep.value}")
            break
        except KeyError as e:
            # lets try to reinterpret the handle as a new Source
            s = Source.from_handle(h)

# sz = Source.from_handle(h)
# hz = next(sz.handles(sm))
# rz = hz.follow(sm)
# with rz.make_stream() as fp:
#     print("\t\t{0}".format(fp.read()))

## Lets try manual
hd = DataHandle(DataSource(content=b64encode(gzip_content),
                           mime="text/plain",
                           name="sitemap.xml.gz"),
                relpath="sitemap.xml.gz")
rd = hd.follow(sm)
print("data resource b64encoded gzip -  ")
with rd.make_stream() as fp:
コード例 #10
0
 def run_rule_on_handle(self, handle):
     with SourceManager() as sm:
         source = Source.from_handle(handle, sm)
         self.assertIsNotNone(
             source, "{0} couldn't be made into a Source".format(handle))
         self.run_rule(source, sm)