def test_incomplete_json(self): with self.assertRaises(DeserialisationError): Source.from_json_object({"hostname": "gopher.invalid"}) with self.assertRaises(DeserialisationError): Handle.from_json_object({ "source": { "type": "gopher", "hostname": "gopher.invalid" }, "path": "/Reference" })
def test_invalid_json(self): with self.assertRaises(UnknownSchemeError): Source.from_json_object({ "type": "gopher", "hostname": "gopher.invalid" }) with self.assertRaises(UnknownSchemeError): Handle.from_json_object({ "type": "gopher", "source": { "type": "gopher", "hostname": "gopher.invalid" }, "path": "/Reference" })
"name": "test.txt", } # json `data` content needs to be base64 encoded json_gzip = { "type": "data", "content": b64encode(gzip_content), "mime": "application/gzip", "name": "test.txt", } for j in ( json_data, json_gzip, ): s = Source.from_json_object(j) while True: h_generator = s.handles(sm) h = next(h_generator) r = h.follow(sm) #if h.guess_type() == "text/plain": print(f"handle\t{h}") print(f"resource\t{r}") print("raw content:") with r.make_stream() as fp: print("\t\t{0}".format(fp.read())) # should succed for text -> text conversion try:
body = { "rule": { "type": "regex", "expression": "[Tt]est" }, "source": { "type": "data", "content": "VGhpcyBpcyBvbmx5IGEgdGVzdA==", "mime": "text/plain", "name": "test.txt" } } source = Source.from_json_object(body["source"]) top_type = _get_top(source).type_label rule = Rule.from_json_object(body["rule"]) message = messages.ScanSpecMessage(scan_tag=messages.ScanTagFragment( time=time_now(), user=None, scanner=messages.ScannerFragment(pk=0, name="API server demand scan"), organisation=messages.OrganisationFragment(name="API server", uuid=uuid4())), source=source, rule=rule, configuration={}, progress=None).to_json_object()
with r.make_stream() as fp: content = fp.read() # same as r.compute_type() implemented in FileResource # we could only read the first 512 bytes to get mime type mtype = mime.from_buffer(content) with open(fname, 'wb') as fh: fh.write(content) # To see how the pipeline can work with data sources of all kinds without # knowing what they are, we can try working with the JSON form of ToySource: from os2datascanner.engine2.model.core import Source, SourceManager sm = SourceManager() generic_source = Source.from_json_object({ "type": "toy", "username": "******", "password": "******" }) print([h.relative_path for h in generic_source.handles(sm)]) """ The description of Handles earlier glossed them as references to "objects". But what is an object? To some extent this depends on the Source. In a filesystem, an object is a file: a named stream of bytes with some metadata. In an email account, an object is an email. In a case management system, an object is a case. But sometimes the lines are blurrier than that. For example, consider a Zip file. It is a file: it's a stream of bytes with a name, a size, and some metadata. It can also, however, be viewed as a container for other files, each of which in turn also has these properties.