Example #1
0
 def test_corrupted_ocr(self):
     fs = FilesystemSource(os.path.join(test_data_path, "corrupted"))
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             self.assertEqual(convert(resource, OutputType.Text), None,
                              "{0}: error handling failed".format(h))
Example #2
0
 def test_ocr_conversions(self):
     fs = FilesystemSource(os.path.join(test_data_path, "good"))
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             self.assertEqual(
                 convert(resource, OutputType.Text).value, expected_result,
                 "{0}: content failed".format(h))
Example #3
0
def try_apply(sm, source):
    for handle in source.handles(sm):
        derived = Source.from_handle(handle, sm)
        if derived:
            try_apply(sm, derived)
        else:
            resource = handle.follow(sm)
            representation = convert(resource, OutputType.Text)
            return representation.value
Example #4
0
def try_apply(sm, source, rule):
    for handle in source.handles(sm):
        derived = Source.from_handle(handle, sm)
        if derived:
            yield from try_apply(sm, derived, rule)
        else:
            resource = handle.follow(sm)
            representation = convert(resource, rule.operates_on)
            if representation:
                yield from rule.match(representation.value)
Example #5
0
 def test_size_computation(self):
     fs = FilesystemSource(test_data_path)
     with SourceManager() as sm:
         for h in fs.handles(sm):
             resource = h.follow(sm)
             size = convert(resource, OutputType.ImageDimensions)
             if not size:
                 if "rgba32" in h.relative_path:
                     self.skipTest("Pillow RGBA bug detected -- skipping")
             else:
                 size = size.value
             self.assertEqual(size, expected_size, "{0}: size failed")
Example #6
0
    while True:
        h_generator = s.handles(sm)
        h = next(h_generator)
        r = h.follow(sm)

        #if h.guess_type() == "text/plain":
        print(f"handle\t{h}")
        print(f"resource\t{r}")

        print("raw content:")
        with r.make_stream() as fp:
            print("\t\t{0}".format(fp.read()))

        # should succed for text -> text conversion
        try:
            rep = convert(r, OutputType.Text)
            print(f"Conveted\t{rep.value}")
            break
        except KeyError as e:
            # lets try to reinterpret the handle as a new Source
            s = Source.from_handle(h)

# sz = Source.from_handle(h)
# hz = next(sz.handles(sm))
# rz = hz.follow(sm)
# with rz.make_stream() as fp:
#     print("\t\t{0}".format(fp.read()))

## Lets try manual
hd = DataHandle(DataSource(content=b64encode(gzip_content),
                           mime="text/plain",
Example #7
0
        else:
            break
    return h

converters = registry.__converters
# pprint(f"converters {converters}")

sm = SourceManager()
site = WebSource("http://localhost:64346/")
page = WebHandle(source=site, path="side.html")

resource = page.follow(sm)
resource.check()
mime_type = resource.compute_type()
print(f"mime_type of resource {mime_type}")
link_list = convert(resource, OutputType.Links).value


rule = LinksFollowRule(sensitivity=Sensitivity.INFORMATION)
matches = list(rule.match(link_list))
msg = messages.MatchFragment(rule, matches or [])


with contextlib.closing(site.handles(sm)) as handles:
    first_thing = next(handles)
    second_thing = next(handles)


h = base_referrer(second_thing)
print(f"{second_thing.presentation} have {h.presentation} as base referrer")