Example #1
0
    def _webProcessResponseGest(self, resp_dict, gest_open):
        '''
        Prepare a web reponse using a ingest open directive.

        Notes:
            This should be called by the IO thread consuming the response
            data, not the thread responsible for actually retrieving the web
            data.

        Args:
            resp_dict (dict): Reponse dictionary. It will have the 'data' field
                              overwritten with a SpooledTemporaryFile and the
                              'ingdata' field added with a generator.
            gest_open (dict): Ingest open directive.

        Returns:
            None
        '''
        # Fail fast, let the ingest go boom later.
        if not resp_dict.get('data'):
            return resp_dict
        # SpooledTemporaryFile will reduce memory burden (as the expanse of disk space)
        # in the event we get a large amount of data back from an endpoint.
        buf = tempfile.SpooledTemporaryFile(
            max_size=self.getConfOpt(MAX_SPOOL_FILESIZE))
        # TODO Loop in chunks in the event we have a large amount of data.
        buf.write(resp_dict.get('data'))
        buf.seek(0)
        # Build the generator and replace 'data' with the generator.
        ingdata = s_ingest.iterdata(fd=buf, close_fd=False, **gest_open)
        resp_dict['data'] = buf
        resp_dict['ingdata'] = ingdata
Example #2
0
    def test_ingest_iterdata(self):
        data = {
            'foo': [
                {
                    'fqdn': 'com',
                    'tld': True
                },
                {
                    'fqdn': 'woot.com'
                },
            ],
            'bar': [
                {
                    'fqdn': 'vertex.link',
                    'tld': 0
                },
            ],
            'newp': [
                {
                    'fqdn': 'newp.com',
                    'tld': 0
                },
            ],
        }

        buf = io.BytesIO(json.dumps(data).encode())

        ingdata = s_ingest.iterdata(fd=buf, **{'format': 'json'})

        for _data in ingdata:
            self.nn(_data)
        self.true(buf.closed)

        buf2 = io.BytesIO(json.dumps(data).encode())

        # Leave the file descriptor open.
        ingdata = s_ingest.iterdata(buf2, close_fd=False, **{'format': 'json'})

        for _data in ingdata:
            self.nn(_data)
        self.false(buf2.closed)
        buf2.close()
Example #3
0
    def test_ingest_basic_bufio(self):

        with s_cortex.openurl('ram://') as core:

            info = {
                'ingest': {
                    'iters': (
                        ('foo/*/fqdn', {
                            'forms': [
                                ('inet:fqdn', {
                                    'props': {
                                        'sfx': {'path': '../tld'},
                                    }
                                }),
                            ]
                        }),
                    ),
                },
                'open': {
                    'format': 'json'
                }
            }

            data = {
                'foo': [
                    {'fqdn': 'com', 'tld': True},
                    {'fqdn': 'woot.com'},
                ],

                'bar': [
                    {'fqdn': 'vertex.link', 'tld': 0},
                ],

                'newp': [
                    {'fqdn': 'newp.com', 'tld': 0},
                ],

            }

            buf = s_compat.BytesIO(json.dumps(data).encode())

            ingdata = s_ingest.iterdata(fd=buf, **info.get('open'))

            gest = s_ingest.Ingest(info)

            for _data in ingdata:
                gest.ingest(core, data=_data)

            self.eq(core.getTufoByProp('inet:fqdn', 'com')[1].get('inet:fqdn:sfx'), 1)
            self.eq(core.getTufoByProp('inet:fqdn', 'woot.com')[1].get('inet:fqdn:zone'), 1)

            self.none(core.getTufoByProp('inet:fqdn', 'newp.com'))