Ejemplo n.º 1
0
def load_config(
        config_path: str,
        func_dict: Dict[str, Callable]) -> ConsumerProducerServiceConfig:
    """
    Loads the config for the server

    :param config_path: the path where to load the config
    :return: nothing
    """
    operations = {}
    group_aggregates = {}

    with open(config_path, "r") as yaml_file:
        config_dict = load(yaml_file, Loader=Loader)

    host = config_dict['rabbit_params']['host']
    consume_from = config_dict['rabbit_params']['consume_from']
    produce_to = config_dict['rabbit_params']['produce_to']
    messages_to_group = config_dict['rabbit_params']['messages_to_group']
    publisher_sharding = None
    if 'publisher_sharding' in config_dict:
        publisher_sharding = PublisherSharding(
            **config_dict['publisher_sharding'])
    for group_aggregate in config_dict['group_aggregates']:
        op = GroupAggregate.factory(group_aggregate['type'],
                                    **group_aggregate['args'])
        group_aggregates[group_aggregate['name']] = op
    for operation in config_dict['operations']:
        for k, v in operation['args'].items():
            if isinstance(v, str) and v in func_dict:
                operation['args'][k] = func_dict[v]
        if operation['type'] == 'GroupBy':
            operation['args']['aggregates'] = [
                group_aggregates[agg_name]
                for agg_name in operation['args']['aggregates']
            ]
        op = Operation.factory(operation['type'], **operation['args'])
        operations[operation['name']] = op
    if 'message_pipeline_kwargs' in config_dict:
        message_pipeline = MessagePipeline([
            operations[op_name] for op_name in config_dict['message_pipeline']
        ], **config_dict['message_pipeline_kwargs'])
    else:
        message_pipeline = MessagePipeline([
            operations[op_name] for op_name in config_dict['message_pipeline']
        ])
    return ConsumerProducerServiceConfig(host=host,
                                         consume_from=consume_from,
                                         produce_to=produce_to,
                                         messages_to_group=messages_to_group,
                                         message_pipeline=message_pipeline,
                                         publisher_sharding=publisher_sharding)
Ejemplo n.º 2
0
 def test_pipeline_ends(self):
     pipe = MessagePipeline([], ends_to_receive=3, ends_to_send=1)
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE), ([], False))
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE), ([], False))
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE),
                      ([BroadcastMessage(WINDOW_END_MESSAGE)], True))
     pipe = MessagePipeline([], ends_to_receive=1, ends_to_send=3)
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE), ([
         BroadcastMessage(WINDOW_END_MESSAGE),
         BroadcastMessage(WINDOW_END_MESSAGE),
         BroadcastMessage(WINDOW_END_MESSAGE)
     ], True))
Ejemplo n.º 3
0
 def _factory_cp(self, ops_args, process_args, delete_stuff=True):
     pipe_kwargs, cp_kwargs, message_set_args, sharding_args = process_args
     operations = self._factory_operation_list(ops_args)
     message_set = None
     if message_set_args:
         message_set = self._setup_message_set(*message_set_args,
                                               delete_stuff=delete_stuff)
     p_sharding = None
     if sharding_args:
         p_sharding = PublisherSharding(*sharding_args)
     if 'data_path' in pipe_kwargs:
         self._self_register_dir(pipe_kwargs['data_path'], delete_stuff)
     pipe = MessagePipeline(**pipe_kwargs,
                            operations=(operations if operations else []),
                            idempotency_set=message_set)
     cp = RabbitQueueConsumerProducer(**cp_kwargs,
                                      callable_commiter=pipe,
                                      publisher_sharding=p_sharding)
     return cp
Ejemplo n.º 4
0
 def test_complex_pipeline_multiple_ends(self):
     pipe = MessagePipeline([
         Operation.factory("Filter", "key", lambda x: x != "Z"),
         Operation.factory("GroupBy",
                           group_by="key",
                           aggregates=[
                               GroupAggregate.factory("Count"),
                               GroupAggregate.factory("Sum", "value"),
                               GroupAggregate.factory(
                                   "ValueUnique", "comment"),
                               GroupAggregate.factory("Mean", "time"),
                           ]),
         Operation.factory("Filter", "value_sum", lambda x: x > 5)
     ],
                            ends_to_receive=2,
                            ends_to_send=2)
     self.assertEqual(
         pipe.process({
             "key": "A",
             "value": 2,
             "comment": "test",
             "time": 0.2
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "Z",
             "value": 2,
             "comment": "test",
             "time": 0.2
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "A",
             "value": 1,
             "comment": "test",
             "time": -0.2
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "Z",
             "value": 1,
             "comment": "test",
             "time": -0.2
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "A",
             "value": 0,
             "comment": "test",
             "time": 0.1
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "Z",
             "value": 0,
             "comment": "test",
             "time": 0.1
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "A",
             "value": 0,
             "comment": "test",
             "time": -0.1
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "B",
             "value": 2,
             "comment": "test",
             "time": 0.0
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "B",
             "value": 2,
             "comment": "test",
             "time": 0.0
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "B",
             "value": 2,
             "comment": "test2",
             "time": 0.5
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "B",
             "value": 2,
             "comment": "test3",
             "time": 0.5
         }), ([], False))
     self.assertEqual(
         pipe.process({
             "key": "C",
             "value": 7,
             "comment": "test",
             "time": 0.0
         }), ([], False))
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE), ([], False))
     self.assertEqual(pipe.process(WINDOW_END_MESSAGE),
                      ([{
                          "key": "B",
                          "count": 4,
                          "value_sum": 8,
                          "comment_is_unique": False,
                          "time_mean": 0.25
                      }, {
                          "key": "C",
                          "count": 1,
                          "value_sum": 7,
                          "comment_is_unique": True,
                          "time_mean": 0.0
                      },
                        BroadcastMessage(WINDOW_END_MESSAGE),
                        BroadcastMessage(WINDOW_END_MESSAGE)], True))
Ejemplo n.º 5
0
    def _setup_pipelineC(self):
        message_set = self._setup_message_set('/tmp/message_set1')
        pipe = MessagePipeline([],
                               ends_to_receive=1,
                               ends_to_send=1,
                               idempotency_set=message_set)
        self._setup_queue('pipelineC_step1_shard0')
        self._setup_queue('pipelineC_step1_shard1')
        self._setup_queue('pipelineC_step1_shard2')
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipeline_start', ['pipelineC_step1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP,
            publisher_sharding=PublisherSharding(by_key='key', shards=3))
        self._setup_start_process(cp)
        # receive sharded messages and count
        self._setup_queue('pipelineC_step2')

        message_set = self._setup_message_set('/tmp/message_set2')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineC_step1_shard0', ['pipelineC_step2'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        message_set = self._setup_message_set('/tmp/message_set3')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineC_step1_shard1', ['pipelineC_step2'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        message_set = self._setup_message_set('/tmp/message_set4')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineC_step1_shard2', ['pipelineC_step2'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        # filter

        self._setup_queue('pipelineC_result')
        message_set = self._setup_message_set('/tmp/message_set5')
        pipe = MessagePipeline(
            [Operation.factory("Filter", "count", lambda x: x > 2)],
            ends_to_receive=3,
            ends_to_send=1,
            idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineC_step2',
            ['pipelineC_result'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP,
        )
        self._setup_start_process(cp)
Ejemplo n.º 6
0
    def _setup_pipelineA(self):
        message_set = self._setup_message_set('/tmp/message_set1')
        pipe = MessagePipeline([],
                               ends_to_receive=1,
                               ends_to_send=3,
                               stop_at_window_end=True,
                               idempotency_set=message_set)
        self._setup_queue('pipelineA_step1_queue1')
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipeline_start', ['pipelineA_step1_queue1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)
        # 3 consumers that group and count
        self._setup_queue('pipelineA_step2_queue1')

        message_set = self._setup_message_set('/tmp/message_set2')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               stop_at_window_end=True,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineA_step1_queue1', ['pipelineA_step2_queue1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        message_set = self._setup_message_set('/tmp/message_set3')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               stop_at_window_end=True,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineA_step1_queue1', ['pipelineA_step2_queue1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        message_set = self._setup_message_set('/tmp/message_set4')
        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Count"),
                              ])
        ],
                               ends_to_receive=1,
                               ends_to_send=1,
                               stop_at_window_end=True,
                               idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineA_step1_queue1', ['pipelineA_step2_queue1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        # receive and sum counts
        self._setup_queue('pipelineA_step3_queue1')

        pipe = MessagePipeline([
            Operation.factory("GroupBy",
                              group_by="key",
                              aggregates=[
                                  GroupAggregate.factory("Sum", "count"),
                              ]),
            Operation.factory('Rename', {"count_sum": "count"})
        ],
                               ends_to_receive=3,
                               ends_to_send=1,
                               stop_at_window_end=True)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineA_step2_queue1', ['pipelineA_step3_queue1'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)

        # final filter

        self._setup_queue('pipelineA_result')

        message_set = self._setup_message_set('/tmp/message_set6')
        pipe = MessagePipeline(
            [Operation.factory("Filter", "count", lambda x: x > 2)],
            ends_to_receive=1,
            ends_to_send=1,
            stop_at_window_end=True,
            idempotency_set=message_set)
        cp = RabbitQueueConsumerProducer(
            "localhost",
            'pipelineA_step3_queue1', ['pipelineA_result'],
            pipe,
            messages_to_group=DEFAULT_MESSAGES_TO_GROUP)
        self._setup_start_process(cp)