예제 #1
0
파일: generic.py 프로젝트: tensts/n6
 def set_configuration(self):
     if self.is_config_spec_or_group_declared():
         self.config = self.get_config_section()
     else:
         # backward-compatible behavior needed by a few collectors
         # that have `config_group = None` and -- at the same
         # time -- no `config_spec`/`config_spec_pattern`
         self.config = ConfigSection('<no section declared>')
예제 #2
0
    def test(self, given_args, conf_section_content,
             expected_rabbitmq_config_section, expected_result):

        self.ConfigMock.section.return_value = ConfigSection(
            '<irrelevant for these tests>', conf_section_content)
        expected_rabbitmq_config_spec = RABBITMQ_CONFIG_SPEC_PATTERN.format(
            rabbitmq_config_section=expected_rabbitmq_config_section)

        result = get_amqp_connection_params_dict(*given_args)

        self.assertEqual(self.ConfigMock.mock_calls, [
            call.section(expected_rabbitmq_config_spec),
        ])
        self.assertEqual(result, expected_result)
예제 #3
0
class TestBaseCollector(unittest.TestCase):
    def test_basics(self):
        self.assertTrue(issubclass(BaseCollector, QueuedBase))
        self.assertTrue(hasattr(BaseCollector, 'output_queue'))
        self.assertTrue(hasattr(BaseCollector, 'raw_format_version_tag'))
        self.assertTrue(hasattr(BaseCollector, 'config_required'))
        self.assertTrue(hasattr(BaseCollector, 'config_group'))
        self.assertTrue(hasattr(BaseCollector, 'type'))

    def test_class_attr_values(self):
        self.assertEqual(BaseCollector.output_queue, {
            'exchange': 'raw',
            'exchange_type': 'topic'
        })
        self.assertIsNone(BaseCollector.raw_format_version_tag)
        self.assertEqual(BaseCollector.config_required, ('source', ))
        self.assertIsNone(BaseCollector.config_group)
        self.assertIsNone(BaseCollector.type)

    @foreach(
        param(
            custom_config_group=SAMPLE_CONFIG_SECTION,
            expected_config=ConfigSection(SAMPLE_CONFIG_SECTION, {
                'required_opt': SAMPLE_REQUIRED_VALUE,
                'some_opt': 'ABc dd'
            }),
        ).label('Attribute `config_group` declared.'),

        # If a `config_group` is not set, the config section
        # gets its name from `config_spec`, but only if it
        # specifies just one section.
        param(
            custom_config_spec='''
                [other_section]
                some_opt :: json
                required_opt :: float
            ''',
            expected_config=ConfigSection(SAMPLE_OTHER_CONFIG_SECTION, {
                'required_opt': 123.89,
                'some_opt': [{
                    'a': 'bcd'
                }]
            }),
        ).label('Attribute `config_spec` declared.'),

        # In a case when both `config_group` and `config_spec`
        # are set, a `config_group` value is used as a section's
        # name. Although, the `config_spec` is still used,
        # so if section specified in `config_spec` is found
        # in a config file, it has to follow a specification,
        # e.g. there cannot be any not specified options,
        # if the specification does not allow them (it does not end
        # with three dots).
        param(
            custom_config_group=SAMPLE_OTHER_CONFIG_SECTION,
            custom_config_spec='''
                [some_section]
                not_required_opt = 'test' :: str
                ...
            ''',
            expected_config=ConfigSection(SAMPLE_OTHER_CONFIG_SECTION, {
                'required_opt': '123.89',
                'some_opt': '[{"a": "bcd"}]'
            }),
        ).label('Both `config_group` and `config_spec` set.'),

        # In case of set `config_spec` and `config_required`,
        # options from `config_required` are going to be included
        # in the result config, even though they are not declared
        # in the `config_spec` and additional options are illegal.
        param(
            custom_config_required=('some_opt', ),
            custom_config_spec='''
                [some_section]
                required_opt :: unicode
            ''',
            expected_config=ConfigSection(
                SAMPLE_CONFIG_SECTION, {
                    'required_opt': u'some option which is required',
                    'some_opt': 'ABc dd'
                }),
        ).label('Attributes `config_spec` and `config_required`.'),

        # A `config_spec` attribute can have more than one section
        # declared, if a `config_group` is set, so a section name
        # can be inferred from the `config_spec`.
        param(
            custom_config_group=SAMPLE_OTHER_CONFIG_SECTION,
            custom_config_spec='''
                [some_section]
                required_opt :: unicode
                ...
                [other_section]
                some_opt
                required_opt :: float
                another_opt = 124 :: int
            ''',
            expected_config=ConfigSection(
                SAMPLE_OTHER_CONFIG_SECTION, {
                    'required_opt': 123.89,
                    'some_opt': '[{"a": "bcd"}]',
                    'another_opt': 124
                }),
        ).label('A few sections in `config_spec` and `config_group` set.'),
        param(expected_config=CONFIG_WITH_NO_SECTION_DECLARED, ).label(
            'No `config_spec` or `config_group` declared.'),

        # wrong config declarations
        param(
            custom_config_spec='''
                [some_section]
                required_opt :: unicode
                ...
                [other_section]
                some_opt
                required_opt :: float
                ...
            ''',
            expected_exc=ConfigError,
        ).label(
            'A few sections in the `config_spec` declared and a `config_group` not set.'
        ),
        param(
            custom_config_group=SAMPLE_CONFIG_SECTION,
            custom_config_spec='''
                [other_section]
                wrong_opt = "Option not in config file."
            ''',
            expected_exc=ConfigError,
        ).label("Invalid `config_spec`."),
        param(
            custom_config_group='group_not_found',
            expected_exc=ConfigError,
        ).label("The config section does not exist in config files."),
    )
    @foreach(
        param(source_type='stream'),
        param(source_type='file'),
        param(source_type='blacklist'),
    )
    def test__init(self,
                   source_type,
                   custom_config_required=SAMPLE_CONFIG_REQUIRED,
                   custom_config_group=None,
                   custom_config_spec=None,
                   expected_config=None,
                   expected_exc=None):
        class SomeCollector(BaseCollector):
            config_group = custom_config_group
            config_required = custom_config_required
            config_spec = custom_config_spec
            type = source_type

        MOCKED_SUPER_CLS.__init__.reset_mock()
        with STDERR_PATCHER, SUPER_PATCHER as super_mock, CONFIG_PATCHER:
            # instantiation
            if expected_exc is not None:
                assert expected_config is None, (
                    "A single test case cannot expect both "
                    "exception and config.")
                with self.assertRaises(expected_exc):
                    instance = SomeCollector(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)
            else:
                instance = SomeCollector(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)
                # assert that an instance of the proper type has been returned
                self.assertIsInstance(instance, SomeCollector)
                # assert that super used properly
                super_mock.assert_called_once_with(BaseCollector, instance)
                MOCKED_SUPER_CLS.__init__.assert_called_once_with(
                    a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)
                self.assertEqual(instance.config, expected_config)

    def test__init__type_not_valid(self):
        # generates an exception if type not valid
        class SomeCollector(BaseCollector):
            config_group = SAMPLE_CONFIG_SECTION
            config_required = SAMPLE_CONFIG_REQUIRED
            type = 'olala'

        with SUPER_PATCHER, CONFIG_PATCHER:
            with self.assertRaises(Exception):
                instance = SomeCollector(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)

    def test__init__type_not_set(self):
        # generates an exception if type is None
        class SomeCollector(BaseCollector):
            config_group = SAMPLE_CONFIG_SECTION
            config_required = SAMPLE_CONFIG_REQUIRED

        with SUPER_PATCHER, CONFIG_PATCHER:
            with self.assertRaises(NotImplementedError):
                instance = SomeCollector(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)

    def test__get_script_init_kwargs(self):
        self.assertEqual(BaseCollector.get_script_init_kwargs(), {})

    def test__run_handling__interrupted(self):
        mock = Mock(__class__=BaseCollector,
                    run=Mock(side_effect=KeyboardInterrupt))
        BaseCollector.run_handling(mock)
        mock.run.assert_called_once_with()
        mock.stop.assert_called_once_with()

    def test__run_handling__not_interrupted(self):
        mock = Mock(__class__=BaseCollector)
        BaseCollector.run_handling(mock)
        mock.run.assert_called_once_with()
        self.assertEqual(mock.stop.mock_calls, [])

    def test__get_output_components(self):
        mock = Mock(
            __class__=BaseCollector,
            process_input_data=Mock(
                return_value=dict(ccc=sentinel.ccc, dddd=sentinel.dddd)),
            get_source_channel=Mock(return_value=SAMPLE_SOURCE_CHANNEL),
            get_source=Mock(return_value=SAMPLE_SOURCE),
            get_output_rk=Mock(return_value=SAMPLE_OUTPUT_RK),
            get_output_data_body=Mock(return_value=SAMPLE_OUTPUT_DATA_BODY),
            get_output_prop_kwargs=Mock(
                return_value=SAMPLE_OUTPUT_PROP_KWARGS))
        # the call
        (output_rk, output_data_body,
         output_prop_kwargs) = BaseCollector.get_output_components(
             mock, a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B)
        # assertions
        self.assertIs(output_rk, SAMPLE_OUTPUT_RK)
        self.assertIs(output_data_body, SAMPLE_OUTPUT_DATA_BODY)
        self.assertIs(output_prop_kwargs, SAMPLE_OUTPUT_PROP_KWARGS)
        self.assertEqual(mock.mock_calls, [
            call.process_input_data(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B),
            call.get_source_channel(ccc=sentinel.ccc, dddd=sentinel.dddd),
            call.get_source(source_channel=SAMPLE_SOURCE_CHANNEL,
                            ccc=sentinel.ccc,
                            dddd=sentinel.dddd),
            call.get_output_rk(
                source=SAMPLE_SOURCE, ccc=sentinel.ccc, dddd=sentinel.dddd),
            call.get_output_data_body(
                source=SAMPLE_SOURCE, ccc=sentinel.ccc, dddd=sentinel.dddd),
            call.get_output_prop_kwargs(
                source=SAMPLE_SOURCE,
                output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                ccc=sentinel.ccc,
                dddd=sentinel.dddd),
        ])

    def test__process_input_data(self):
        mock = Mock(__class__=BaseCollector)
        processed_data = BaseCollector.process_input_data(mock,
                                                          a=SAMPLE_ARG_A,
                                                          bb=SAMPLE_ARG_B)
        self.assertEqual(processed_data, dict(a=SAMPLE_ARG_A, bb=SAMPLE_ARG_B))

    def test__get_source_channel(self):
        mock = Mock(__class__=BaseCollector)
        with self.assertRaises(NotImplementedError):
            BaseCollector.get_source_channel(mock)

    def test__get_source(self):
        mock = Mock(__class__=BaseCollector,
                    config=dict(source='my_src_label'))
        source = BaseCollector.get_source(mock,
                                          'my_src_channel',
                                          blablabla=sentinel.blablabla)
        self.assertEqual(source, 'my_src_label.my_src_channel')

    def test__get_output_rk(self):
        mock = Mock(__class__=BaseCollector, raw_format_version_tag=None)
        output_rk = BaseCollector.get_output_rk(mock,
                                                'my_src_label.my_src_channel',
                                                blablabla=sentinel.blablabla)
        self.assertEqual(output_rk, 'my_src_label.my_src_channel')

    def test__get_output_rk__with__raw_format_version_tag(self):
        mock = Mock(__class__=BaseCollector, raw_format_version_tag='33333')
        output_rk = BaseCollector.get_output_rk(mock,
                                                'my_src_label.my_src_channel',
                                                blablabla=sentinel.blablabla)
        self.assertEqual(output_rk, 'my_src_label.my_src_channel.33333')

    def test__get_output_data_body(self):
        mock = Mock(__class__=BaseCollector)
        with self.assertRaises(NotImplementedError):
            BaseCollector.get_output_data_body(mock,
                                               SAMPLE_SOURCE,
                                               blablabla=sentinel.blablabla)

    @foreach(
        param(source_type='stream'),
        param(source_type='file'),
        param(source_type='blacklist'),
    )
    def test__get_output_prop_kwargs(self, source_type):
        mock = Mock(__class__=BaseCollector,
                    type=source_type,
                    content_type=SAMPLE_CONTENT_TYPE,
                    get_output_message_id=Mock(return_value=SAMPLE_MESSAGE_ID))
        created_timestamp = 1234
        with patch('time.time', return_value=created_timestamp) as time_mock:
            # the call
            output_prop_kwargs = BaseCollector.get_output_prop_kwargs(
                mock,
                source=SAMPLE_SOURCE,
                output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                arg_a=SAMPLE_ARG_A)
            # assertions
            time_mock.assert_called_once_with()
            mock.get_output_message_id.assert_called_once_with(
                source=SAMPLE_SOURCE,
                created_timestamp=created_timestamp,
                output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                arg_a=SAMPLE_ARG_A)
            # if the stream is of the type "source" - it does not
            # add `content_type` to the properties
            if source_type == 'stream':
                self.assertEqual(
                    output_prop_kwargs, {
                        'message_id': SAMPLE_MESSAGE_ID,
                        'type': source_type,
                        'timestamp': 1234,
                        'headers': {}
                    })
            else:
                self.assertEqual(
                    output_prop_kwargs, {
                        'message_id': SAMPLE_MESSAGE_ID,
                        'type': source_type,
                        'content_type': SAMPLE_CONTENT_TYPE,
                        'timestamp': 1234,
                        'headers': {}
                    })

    @foreach(
        param(source_type='stream'),
        param(source_type='file'),
        param(source_type='blacklist'),
    )
    def test__get_output_prop_kwargs_content_type_not_set(self, source_type):
        mock = Mock(spec=BaseCollector,
                    type=source_type,
                    get_output_message_id=Mock(return_value=SAMPLE_MESSAGE_ID))
        created_timestamp = 1234
        with patch('time.time', return_value=created_timestamp) as time_mock:
            # collectors handling the sources of "stream" type do not
            # need the `content_type` to be set
            if source_type == 'stream':
                output_prop_kwargs = BaseCollector.get_output_prop_kwargs(
                    mock,
                    source=SAMPLE_SOURCE,
                    output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                    arg_a=SAMPLE_ARG_A)
                # assertions
                time_mock.assert_called_once_with()
                mock.get_output_message_id.assert_called_once_with(
                    source=SAMPLE_SOURCE,
                    created_timestamp=created_timestamp,
                    output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                    arg_a=SAMPLE_ARG_A)
                self.assertEqual(
                    output_prop_kwargs, {
                        'message_id': SAMPLE_MESSAGE_ID,
                        'type': source_type,
                        'timestamp': 1234,
                        'headers': {}
                    })
            else:
                with self.assertRaises(AttributeError):
                    BaseCollector.get_output_prop_kwargs(
                        mock,
                        source=SAMPLE_SOURCE,
                        output_data_body=SAMPLE_OUTPUT_DATA_BODY,
                        arg_a=SAMPLE_ARG_A)

    def test__get_output_message_id(self):
        source = 'my_src_label.my_src_channel'
        created_timestamp = 1234
        created_timestamp_str = '1234'
        output_data_body = '1234'
        mock = Mock(__class__=BaseCollector)
        message_id = BaseCollector.get_output_message_id(
            mock, source, created_timestamp, output_data_body)

        ### XXX CR: rather hardcode a few specific md5s instead of:
        expected_message_id = hashlib.md5(source + '\0' +
                                          created_timestamp_str + '\0' +
                                          output_data_body).hexdigest()
        self.assertEqual(message_id, expected_message_id)
예제 #4
0
    BaseCollector,
    BaseEmailSourceCollector,
    BaseOneShotCollector,
    BaseTimeOrderedRowsCollector,
    BaseUrlDownloaderCollector,
)
from n6.tests.collectors._collectors_test_helpers import _BaseCollectorTestCase

SAMPLE_ARG_A = sentinel.a
SAMPLE_ARG_B = sentinel.b
SAMPLE_CONFIG_SECTION = "some_section"
SAMPLE_OTHER_CONFIG_SECTION = "other_section"
SAMPLE_CONFIG_REQUIRED = ('required_opt', )
SAMPLE_REQUIRED_VALUE = "some option which is required"
SAMPLE_OTHER_REQUIRED_VALUE = "123.89"
CONFIG_WITH_NO_SECTION_DECLARED = ConfigSection('<no section declared>')
MOCKED_CONFIG = {
    'some_section': {
        'required_opt': SAMPLE_REQUIRED_VALUE,
        'some_opt': "ABc dd",
    },
    'other_section': {
        'required_opt': SAMPLE_OTHER_REQUIRED_VALUE,
        'some_opt': '[{"a": "bcd"}]',
    },
}
MOCKED_SUPER_CLS = PlainNamespace(__init__=Mock())
SAMPLE_EMAIL_MESSAGE = sentinel.email_msg
SAMPLE_INPUT_DATA = sentinel.input_data
SAMPLE_MESSAGE_ID = sentinel.message_id
SAMPLE_SOURCE = sentinel.source
예제 #5
0
class TestBaseParser(unittest.TestCase):
    def setUp(self):
        self.mock = Mock(__class__=BaseParser, allow_empty_results=False)
        self.meth = MethodProxy(BaseParser, self.mock)

    def _asserts_of_proper__new__instance_adjustment(self, instance):
        # BaseQueued.__new__() ensures that
        self.assertIsNot(instance.input_queue, BaseParser.input_queue)

    def _asserts_of_proper_preinit_hook_instance_adjustment(
            self, instance, binding_key):
        # for classes with `default_binding_key`
        # BaseParser.preinit_hook() ensures that
        self.assertEqual(
            instance.input_queue, {
                'exchange': 'raw',
                'exchange_type': 'topic',
                'queue_name': binding_key,
                'binding_keys': [binding_key],
            })
        self.assertEqual(BaseParser.input_queue, {
            'exchange': 'raw',
            'exchange_type': 'topic',
        })

    def _basic_init_related_asserts(self, instance, subclass, super_mock,
                                    super_cls_mock, expected_config,
                                    expected_config_full):
        # assert that an instance of the proper type has been returned
        self.assertIsInstance(instance, subclass)
        # assert that super used properly
        super_mock.assert_called_once_with(BaseParser, instance)
        super_cls_mock.__init__.assert_called_once_with(a=sentinel.a,
                                                        bb=sentinel.bb)
        # assert that configuration stuff has been obtained properly
        self.assertEqual(instance.config, expected_config)
        self.assertIsInstance(instance.config, ConfigSection)
        self.assertEqual(instance.config_full, expected_config_full)
        self.assertIsInstance(instance.config_full, Config)

    def test_basics(self):
        self.assertTrue(issubclass(BaseParser, QueuedBase))
        self.assertTrue(hasattr(BaseParser, 'default_binding_key'))
        self.assertTrue(hasattr(BaseParser, 'config_spec_pattern'))
        self.assertTrue(hasattr(BaseParser, 'constant_items'))
        self.assertTrue(hasattr(BaseParser, 'record_dict_class'))
        self.assertTrue(hasattr(BaseParser, 'event_type'))

    def test_config_spec_pattern(self):
        config_spec = BaseParser.config_spec_pattern.format(
            parser_class_name='example_foo')
        config_spec_parsed = parse_config_spec(config_spec)
        prefetch_count_opt_spec = config_spec_parsed.get_opt_spec(
            'example_foo.prefetch_count')
        self.assertEqual(prefetch_count_opt_spec.name, 'prefetch_count')
        self.assertEqual(prefetch_count_opt_spec.converter_spec, 'int')

    def test_initialization_without_default_binding_key(self):
        class SomeParser(BaseParser):
            pass  # no `default_binding_key` defined => it's an abstract class

        with self.assertRaises(NotImplementedError):
            SomeParser()

        unready_instance = SomeParser.__new__(SomeParser)
        self._asserts_of_proper__new__instance_adjustment(unready_instance)
        # for classes without `default_binding_key`
        # `queue_name` and `binding_keys` items are *not* added...
        self.assertEqual(unready_instance.input_queue, BaseParser.input_queue)
        self.assertEqual(BaseParser.input_queue, {
            'exchange': 'raw',
            'exchange_type': 'topic',
        })

    @foreach(
        param(
            mocked_conf_from_files={},
            expected_config=ConfigSection('SomeParser', {'prefetch_count': 1}),
            expected_config_full=Config.make(
                {'SomeParser': {
                    'prefetch_count': 1
                }}),
        ),
        param(
            mocked_conf_from_files={
                'SomeParser': {
                    'prefetch_count': '42'
                },
                'another_section': {
                    'another_opt': '123.456'
                },
            },
            expected_config=ConfigSection('SomeParser',
                                          {'prefetch_count': 42}),
            expected_config_full=Config.make(
                {'SomeParser': {
                    'prefetch_count': 42
                }}),
        ),
        param(
            custom_config_spec_pattern=concat_reducing_indent(
                BaseParser.config_spec_pattern,
                '''
                    some_opt = [-3, null] :: json
                    [another_section]
                    another_opt :: float
                    yet_another_opt = Foo Bar Spam Ham
                ''',
            ),
            mocked_conf_from_files={
                'SomeParser': {
                    'prefetch_count': '42'
                },
                'another_section': {
                    'another_opt': '123.456'
                },
            },
            expected_config=ConfigSection('SomeParser', {
                'prefetch_count': 42,
                'some_opt': [-3, None],
            }),
            expected_config_full=Config.make({
                'SomeParser': {
                    'prefetch_count': 42,
                    'some_opt': [-3, None],
                },
                'another_section': {
                    'another_opt': 123.456,
                    'yet_another_opt': 'Foo Bar Spam Ham',
                },
            }),
        ),
    )
    @foreach(
        param(binding_key='foo.bar'),
        param(binding_key='foo.bar.33'),
    )
    def test_initialization_with_default_binding_key(
            self,
            binding_key,
            mocked_conf_from_files,
            expected_config,
            expected_config_full,
            custom_config_spec_pattern=None):
        class SomeParser(BaseParser):
            default_binding_key = binding_key  # => it's a concrete class

        if custom_config_spec_pattern is not None:
            SomeParser.config_spec_pattern = custom_config_spec_pattern

        unready_instance = SomeParser.__new__(SomeParser)
        self._asserts_of_proper__new__instance_adjustment(unready_instance)
        self._asserts_of_proper_preinit_hook_instance_adjustment(
            unready_instance, binding_key)

        super_cls_mock = SimpleNamespace(__init__=Mock())
        with patch_always('n6.parsers.generic.super',
                          return_value=super_cls_mock) as super_mock, \
             patch('n6.parsers.generic.Config._load_n6_config_files',
                   return_value=mocked_conf_from_files):
            # instantiation
            instance = SomeParser(a=sentinel.a, bb=sentinel.bb)
            self._asserts_of_proper__new__instance_adjustment(instance)
            self._asserts_of_proper_preinit_hook_instance_adjustment(
                instance, binding_key)
            self._basic_init_related_asserts(instance, SomeParser, super_mock,
                                             super_cls_mock, expected_config,
                                             expected_config_full)

    def test__make_binding_keys(self):
        self.mock.default_binding_key = 'fooo.barr'
        binding_keys = self.meth.make_binding_keys()
        self.assertEqual(binding_keys, ['fooo.barr'])
        self.assertEqual(self.mock.mock_calls, [])

    def test__make_binding_keys_with_raw_format_version_tag(self):
        self.mock.default_binding_key = 'fooo.barr.33'
        binding_keys = self.meth.make_binding_keys()
        self.assertEqual(binding_keys, ['fooo.barr.33'])
        self.assertEqual(self.mock.mock_calls, [])

    def test__get_script_init_kwargs(self):
        self.assertIsInstance(
            vars(BaseParser)['get_script_init_kwargs'], classmethod)
        init_kwargs = BaseParser.get_script_init_kwargs.__func__(self.mock)
        self.assertEqual(init_kwargs, {})
        self.assertEqual(self.mock.mock_calls, [])

    def test__run_handling__interrupted(self):
        self.mock.configure_mock(**{'run.side_effect': KeyboardInterrupt})
        self.meth.run_handling()
        self.mock.run.assert_called_once_with()
        self.mock.stop.assert_called_once_with()

    def test__run_handling__not_interrupted(self):
        self.meth.run_handling()
        self.mock.run.assert_called_once_with()
        self.assertEqual(self.mock.stop.mock_calls, [])

    @patch('n6.parsers.generic.FilePagedSequence')
    def test__input_callback(self, FilePagedSequence_mock):
        FilePagedSequence_mock.return_value = MagicMock()
        FilePagedSequence_mock.return_value.__enter__.return_value = sentinel.working_seq
        data = MagicMock(**{'get.return_value': sentinel.rid})
        self.mock.configure_mock(
            **{
                '_fix_body.return_value':
                sentinel.body,
                'prepare_data.return_value':
                data,
                'setting_error_event_info':
                MagicMock(),
                'get_output_rk.return_value':
                sentinel.output_rk,
                'get_output_bodies.return_value':
                [sentinel.output_body1, sentinel.output_body2],
            })
        self.meth.input_callback(sentinel.routing_key, sentinel.body,
                                 sentinel.properties)
        self.assertEqual(self.mock.mock_calls, [
            call._fix_body(sentinel.body),
            call.prepare_data(sentinel.routing_key, sentinel.body,
                              sentinel.properties),
            call.prepare_data().get('properties.message_id'),
            call.setting_error_event_info(sentinel.rid),
            call.setting_error_event_info().__enter__(),
            call.get_output_rk(data),
            call.get_output_bodies(data, sentinel.working_seq),
            call.publish_output(routing_key=sentinel.output_rk,
                                body=sentinel.output_body1),
            call.publish_output(routing_key=sentinel.output_rk,
                                body=sentinel.output_body2),
            call.setting_error_event_info().__exit__(None, None, None),
        ])
        self.assertEqual(FilePagedSequence_mock.mock_calls, [
            call(page_size=1000),
            call().__enter__(),
            call().__exit__(None, None, None),
        ])

    def test__prepare_data(self):
        data = self.meth.prepare_data(routing_key='ham.spam',
                                      body=sentinel.body,
                                      properties=SimpleNamespace(
                                          foo=sentinel.foo,
                                          bar=sentinel.bar,
                                          timestamp=1389348840,
                                          headers={'a': sentinel.a}))
        self.assertEqual(
            data, {
                'a': sentinel.a,
                'properties.foo': sentinel.foo,
                'properties.bar': sentinel.bar,
                'source': 'ham.spam',
                'properties.timestamp': '2014-01-10 10:14:00',
                'raw_format_version_tag': None,
                'raw': sentinel.body,
            })

    def test__prepare_data__rk__with_raw_format_version_tag(self):
        data = self.meth.prepare_data(routing_key='ham.spam.33',
                                      body=sentinel.body,
                                      properties=SimpleNamespace(
                                          foo=sentinel.foo,
                                          bar=sentinel.bar,
                                          timestamp=1389348840,
                                          headers={'a': sentinel.a}))
        self.assertEqual(
            data, {
                'a': sentinel.a,
                'properties.foo': sentinel.foo,
                'properties.bar': sentinel.bar,
                'source': 'ham.spam',
                'properties.timestamp': '2014-01-10 10:14:00',
                'raw_format_version_tag': '33',
                'raw': sentinel.body,
            })

    def test__get_output_rk(self):
        self.mock.configure_mock(**{
            'event_type': 'foobar',
        })
        data = {'source': 'ham.spam'}
        output_rk = self.meth.get_output_rk(data)
        self.assertEqual(output_rk, 'foobar.parsed.ham.spam')

    def test__get_output_bodies(self):
        parsed = [
            MagicMock(
                **{
                    '__class__':
                    RecordDict,
                    'used_as_context_manager':
                    True,
                    'get_ready_json.return_value':
                    getattr(sentinel, 'output_body{}'.format(i))
                }) for i in (1, 2)
        ]
        self.mock.configure_mock(
            **{
                'parse.return_value':
                parsed,
                'get_output_message_id.side_effect': [
                    sentinel.msg_A,
                    sentinel.msg_B,
                ],
                'setting_error_event_info':
                MagicMock(),
                'postprocess_parsed.side_effect': (
                    lambda data, parsed, total, item_no: parsed),
            })
        seq_mock = FilePagedSequence._instance_mock()
        output_bodies = self.meth.get_output_bodies(sentinel.data, seq_mock)
        self.assertIs(output_bodies, seq_mock)
        self.assertEqual(seq_mock._list, [
            sentinel.output_body1,
            sentinel.output_body2,
        ])
        self.assertEqual(parsed[0].mock_calls, [
            call.__setitem__('id', sentinel.msg_A),
            call.get_ready_json(),
        ])
        self.assertEqual(parsed[1].mock_calls, [
            call.__setitem__('id', sentinel.msg_B),
            call.get_ready_json(),
        ])
        self.assertEqual(self.mock.mock_calls, [
            call.parse(sentinel.data),
            call.get_output_message_id(parsed[0]),
            call.delete_too_long_address(parsed[0]),
            call.get_output_message_id(parsed[1]),
            call.delete_too_long_address(parsed[1]),
            call.setting_error_event_info(parsed[0]),
            call.setting_error_event_info().__enter__(),
            call.postprocess_parsed(sentinel.data, parsed[0], 2, item_no=1),
            call.setting_error_event_info().__exit__(None, None, None),
            call.setting_error_event_info(parsed[1]),
            call.setting_error_event_info().__enter__(),
            call.postprocess_parsed(sentinel.data, parsed[1], 2, item_no=2),
            call.setting_error_event_info().__exit__(None, None, None),
        ])

    def test__get_output_bodies__record_dict_not_used_as_context_manager(self):
        parsed = [
            MagicMock(**{
                '__class__': RecordDict,
                'used_as_context_manager': False
            }) for i in (1, 2)
        ]
        self.mock.configure_mock(**{'parse.return_value': parsed})
        with self.assertRaises(AssertionError):
            self.meth.get_output_bodies(sentinel.data,
                                        FilePagedSequence._instance_mock())
        self.assertEqual(self.mock.method_calls, [
            call.parse(sentinel.data),
        ])

    def test__get_output_bodies__parse_yielded_no_items(self):
        self.mock.configure_mock(**{'parse.return_value': iter([])})
        with self.assertRaises(ValueError):
            self.meth.get_output_bodies(sentinel.data,
                                        FilePagedSequence._instance_mock())
        self.assertEqual(self.mock.method_calls, [
            call.parse(sentinel.data),
        ])

    def test__get_output_bodies__parse_yielded_no_items__allow_empty_results(
            self):
        self.mock.configure_mock(**{
            'parse.return_value': iter([]),
            'allow_empty_results': True
        })
        seq_mock = FilePagedSequence._instance_mock()
        output_bodies = self.meth.get_output_bodies(sentinel.data, seq_mock)
        self.assertIs(output_bodies, seq_mock)
        self.assertEqual(seq_mock._list, [])  # just empty
        self.assertEqual(self.mock.mock_calls, [
            call.parse(sentinel.data),
        ])

    def test__delete_too_long_address__address_is_ok(self):
        parsed = RecordDict()
        parsed['address'] = [{'ip': i + 1} for i in xrange(MAX_IPS_IN_ADDRESS)]
        expected = RecordDict()
        expected['address'] = [{
            'ip': i + 1
        } for i in xrange(MAX_IPS_IN_ADDRESS)]
        self.meth.delete_too_long_address(parsed)
        self.assertEqual(parsed, expected)

    def test__delete_too_long_address__address_is_too_long(self):
        ips = MAX_IPS_IN_ADDRESS + 1
        parsed = RecordDict()
        parsed['id'] = '0123456789abcdef0123456789abcdef'
        parsed['address'] = [{'ip': i + 1} for i in xrange(ips)]
        expected = RecordDict()
        expected['id'] = '0123456789abcdef0123456789abcdef'
        self.meth.delete_too_long_address(parsed)
        self.assertEqual(parsed, expected)

    def test__delete_too_long_address__address_is_empty(self):
        parsed = RecordDict()
        parsed.update({'source': 'foo.bar'})
        expected = RecordDict()
        expected.update({'source': 'foo.bar'})
        self.meth.delete_too_long_address(parsed)
        self.assertEqual(parsed, expected)

    def test__get_output_message_id(self):
        inputs_and_resultant_hash_bases = [
            # basics
            ({
                'source': 'foo.bar'
            }, 'source,foo.bar'),
            ({
                u'source': u'foo.bar'
            }, 'source,foo.bar'),
            # proper sorting of multiple values
            ({
                'key1': 2,
                'key2': ['value2', 'value3', 'value1']
            }, 'key1,2\nkey2,value1,value2,value3'),
            # ...and of keys + proper encoding of unicode keys/values
            ({
                u'key2': [u'value3', u'value1', u'value2'],
                u'key1': 2L
            }, 'key1,2\nkey2,value1,value2,value3'),
            # ...as well as proper int/long normalization/representation
            ({
                u'key2': [30, 10, 20L],
                u'key1': 9000111222333444555666777888999000L
            }, 'key1,9000111222333444555666777888999000\nkey2,10,20,30'),
            # non-ascii values
            ({
                'target': 'zażółć',
                u'client': [u'jaźń', u'gęślą']
            }, 'client,gęślą,jaźń\ntarget,zażółć'),
            ({
                u'target': u'zażółć',
                'client': ['jaźń', 'gęślą']
            }, 'client,gęślą,jaźń\ntarget,zażółć'),
            # subdicts
            ({
                'dip': u'3.3.3.3',
                u'address': [{
                    'ip': '255.255.255.0'
                }, {
                    'ip': '127.0.0.1'
                }]
            },
             "address,{'ip': '127.0.0.1'},{'ip': '255.255.255.0'}\ndip,3.3.3.3"
             ),
            # non-ascii subdict keys/values
            ({
                u'key2': [{
                    'ką2': 'vą2'
                }, {
                    'ką1': 'vą1'
                }],
                'key1': {
                    'ką': 'vą'
                }
            }, "key1,{'k\\xc4\\x85': 'v\\xc4\\x85'}\n" +
             "key2,{'k\\xc4\\x851': 'v\\xc4\\x851'},{'k\\xc4\\x852': 'v\\xc4\\x852'}"
             ),
            # proper encoding of unicode keys/values + proper sorting of whole subdicts
            ({
                'key1': {
                    u'ką': u'vą'
                },
                'key2': [{
                    u'ką2': 'vą2'
                }, {
                    'ką1': u'vą1'
                }]
            }, "key1,{'k\\xc4\\x85': 'v\\xc4\\x85'}\n" +
             "key2,{'k\\xc4\\x851': 'v\\xc4\\x851'},{'k\\xc4\\x852': 'v\\xc4\\x852'}"
             ),
            # ...as well as proper int/long normalization/representation
            ({
                'key1': {
                    u'k': 2L
                },
                'key2': [{
                    'k2': 2L
                }, {
                    u'k1': 1
                }]
            }, "key1,{'k': 2}\nkey2,{'k1': 1},{'k2': 2}"),
            ({
                u'key2': [{
                    'k2': 2
                }, {
                    'k1': 1
                }],
                'key1': {
                    'k': 3
                }
            }, "key1,{'k': 3}\nkey2,{'k1': 1},{'k2': 2}"),
            ({
                u'key2': [{
                    'k2': 2L
                }, {
                    'k1': 1L
                }],
                'key1': {
                    'k': 9000111222333444555666777888999000L
                }
            },
             "key1,{'k': 9000111222333444555666777888999000}\nkey2,{'k1': 1},{'k2': 2}"
             ),
            # proper sorting of multiple items in subdicts
            ({
                'key1': {
                    'c': 2,
                    u'a': 3L,
                    u'b': 1L
                },
                'key2': [{
                    'c': 2,
                    u'a': 3L,
                    u'b': 1L
                }, {
                    'd': 3,
                    u'a': 2L,
                    u'b': 1L
                }]
            }, "key1,{'a': 3, 'b': 1, 'c': 2}\n" +
             "key2,{'a': 2, 'b': 1, 'd': 3},{'a': 3, 'b': 1, 'c': 2}"),
        ]

        class _RecordDict(RecordDict):
            adjust_key1 = adjust_key2 = None
            optional_keys = RecordDict.optional_keys | {'key1', 'key2'}

        parser = BaseParser.__new__(BaseParser)
        for input_dict, expected_base in inputs_and_resultant_hash_bases:
            record_dict = _RecordDict(input_dict)
            expected_result = hashlib.md5(expected_base).hexdigest()
            result = parser.get_output_message_id(record_dict)
            self.assertIsInstance(result, str)
            self.assertEqual(result, expected_result)

    def test__get_output_message_id__errors(self):
        inputs_and_exc_classes = [
            # bad subdict key type
            (
                {
                    'key1': {
                        32: 2
                    }
                },
                TypeError,
            ),
            (
                {
                    'key1': [{
                        32: 2
                    }]
                },
                TypeError,
            ),
            # bad subdict value type
            (
                {
                    'key1': {
                        'k': 2.3
                    }
                },
                TypeError,
            ),
            (
                {
                    'key1': [{
                        'k': 2.3
                    }]
                },
                TypeError,
            ),
            (
                {
                    'key1': {
                        'k': {
                            'k': 2
                        }
                    }
                },  # nesting is illegal
                TypeError,
            ),
            (
                {
                    'key1': [{
                        'k': {
                            'k': 2
                        }
                    }]
                },  # nesting is illegal
                TypeError,
            ),
            # bad value type
            (
                {
                    'key1': 2.3
                },
                TypeError,
            ),
            (
                {
                    'key1': [2.3]
                },
                TypeError,
            ),
            (
                {
                    'key1': [[2]]
                },  # nesting is illegal
                TypeError,
            ),
        ]

        class _RecordDict(RecordDict):
            adjust_key1 = adjust_key2 = None
            optional_keys = RecordDict.optional_keys | {'key1', 'key2'}

        parser = BaseParser.__new__(BaseParser)
        for input_dict, exc_class in inputs_and_exc_classes:
            record_dict = _RecordDict(input_dict)
            with self.assertRaises(exc_class):
                parser.get_output_message_id(record_dict)

    def test__postprocess_parsed__without__do_not_resolve_fqdn_to_ip(self):
        data = {}
        parsed = RecordDict()
        self.meth.postprocess_parsed(data, parsed, 1, item_no=1)
        self.assertEqual(parsed, {})

    def test__postprocess_parsed__with__do_not_resolve_fqdn_to_ip__False(self):
        data = {'_do_not_resolve_fqdn_to_ip': False}
        parsed = RecordDict()
        self.meth.postprocess_parsed(data, parsed, 1, item_no=1)
        self.assertEqual(parsed, {})

    def test__postprocess_parsed__with__do_not_resolve_fqdn_to_ip__True(self):
        data = {'_do_not_resolve_fqdn_to_ip': True}
        parsed = RecordDict()
        self.meth.postprocess_parsed(data, parsed, 1, item_no=1)
        self.assertEqual(parsed, {'_do_not_resolve_fqdn_to_ip': True})
예제 #6
0
 def _get_mocked_config(self):
     mocked_config = ConfigSection(mock.sentinel.section,
                                   {'source': self._COLLECTOR_SOURCE})
     if self.additional_config_opts is not None:
         mocked_config.update(self.additional_config_opts)
     return mocked_config