def _webProcessResponseGest(self, resp_dict, gest_open): ''' Prepare a web reponse using a ingest open directive. Notes: This should be called by the IO thread consuming the response data, not the thread responsible for actually retrieving the web data. Args: resp_dict (dict): Reponse dictionary. It will have the 'data' field overwritten with a SpooledTemporaryFile and the 'ingdata' field added with a generator. gest_open (dict): Ingest open directive. Returns: None ''' # Fail fast, let the ingest go boom later. if not resp_dict.get('data'): return resp_dict # SpooledTemporaryFile will reduce memory burden (as the expanse of disk space) # in the event we get a large amount of data back from an endpoint. buf = tempfile.SpooledTemporaryFile( max_size=self.getConfOpt(MAX_SPOOL_FILESIZE)) # TODO Loop in chunks in the event we have a large amount of data. buf.write(resp_dict.get('data')) buf.seek(0) # Build the generator and replace 'data' with the generator. ingdata = s_ingest.iterdata(fd=buf, close_fd=False, **gest_open) resp_dict['data'] = buf resp_dict['ingdata'] = ingdata
def test_ingest_iterdata(self): data = { 'foo': [ { 'fqdn': 'com', 'tld': True }, { 'fqdn': 'woot.com' }, ], 'bar': [ { 'fqdn': 'vertex.link', 'tld': 0 }, ], 'newp': [ { 'fqdn': 'newp.com', 'tld': 0 }, ], } buf = io.BytesIO(json.dumps(data).encode()) ingdata = s_ingest.iterdata(fd=buf, **{'format': 'json'}) for _data in ingdata: self.nn(_data) self.true(buf.closed) buf2 = io.BytesIO(json.dumps(data).encode()) # Leave the file descriptor open. ingdata = s_ingest.iterdata(buf2, close_fd=False, **{'format': 'json'}) for _data in ingdata: self.nn(_data) self.false(buf2.closed) buf2.close()
def test_ingest_basic_bufio(self): with s_cortex.openurl('ram://') as core: info = { 'ingest': { 'iters': ( ('foo/*/fqdn', { 'forms': [ ('inet:fqdn', { 'props': { 'sfx': {'path': '../tld'}, } }), ] }), ), }, 'open': { 'format': 'json' } } data = { 'foo': [ {'fqdn': 'com', 'tld': True}, {'fqdn': 'woot.com'}, ], 'bar': [ {'fqdn': 'vertex.link', 'tld': 0}, ], 'newp': [ {'fqdn': 'newp.com', 'tld': 0}, ], } buf = s_compat.BytesIO(json.dumps(data).encode()) ingdata = s_ingest.iterdata(fd=buf, **info.get('open')) gest = s_ingest.Ingest(info) for _data in ingdata: gest.ingest(core, data=_data) self.eq(core.getTufoByProp('inet:fqdn', 'com')[1].get('inet:fqdn:sfx'), 1) self.eq(core.getTufoByProp('inet:fqdn', 'woot.com')[1].get('inet:fqdn:zone'), 1) self.none(core.getTufoByProp('inet:fqdn', 'newp.com'))