Exemple #1
0
 def messages(self, random_schema):
     return [
         CreateMessage(random_schema.schema_id,
                       payload=str(i),
                       upstream_position_info={'position': i + 1})
         for i in range(self.number_of_messages)
     ]
Exemple #2
0
 def secondary_messages(self, secondary_random_schema):
     return [
         CreateMessage(secondary_random_schema.schema_id,
                       payload=str(i),
                       upstream_position_info={'position': i + 1})
         for i in range(-2, 1)
     ]
Exemple #3
0
 def _create_message(schema, timeslot):
     monitor_start_time = producer.monitor.start_time
     return CreateMessage(
         schema.schema_id,
         payload=payload,
         timestamp=int(self.get_timestamp(monitor_start_time,
                                          timeslot)))
Exemple #4
0
 def test_producer_registration_message_on_exit(self, producer_instance):
     producer = producer_instance.__enter__()
     with attach_spy_on_func(producer.registrar, 'stop') as func_spy:
         producer.publish(
             CreateMessage(schema_id=1, payload=bytes("Test message")))
         producer.__exit__(None, None, None)
         assert func_spy.call_count == 1
    def _setup_handle_data_create_event_to_publish_call(
            self, producer, stats_counter, test_table, test_topic,
            first_test_kafka_offset, second_test_kafka_offset,
            data_event_handler, data_create_events, schema_wrapper_entry,
            patches, patch_get_payload_schema, position):
        expected_call_args = []
        for data_event in data_create_events:
            position = LogPosition(log_file='binlog', log_pos=100)
            upstream_position_info = {
                "position": position.to_dict(),
                "cluster_name": "yelp_main",
                "database_name": "fake_database",
                "table_name": "fake_table"
            }
            data_event_handler.handle_event(data_event, position)
            expected_call_args.append(
                CreateMessage(payload_data=data_event.row["values"],
                              schema_id=schema_wrapper_entry.schema_id,
                              upstream_position_info=upstream_position_info,
                              keys=(u'primary_key', ),
                              timestamp=data_event.timestamp))
        actual_call_args = [i[0][0] for i in producer.publish.call_args_list]
        self._assert_messages_as_expected(expected_call_args, actual_call_args)

        assert producer.publish.call_count == len(data_create_events)
Exemple #6
0
 def test_publish_pii_message(self, pii_schema, payload, producer_instance):
     with reconfigure(
             encryption_type='AES_MODE_CBC-1',
             skip_messages_with_pii=False), producer_instance as producer:
         pii_message = CreateMessage(schema_id=pii_schema.schema_id,
                                     payload=payload)
         self._publish_and_assert_pii_message(pii_message, producer)
     assert len(multiprocessing.active_children()) == 0
Exemple #7
0
 def test_producer_periodic_registration_messages(self, producer_instance):
     """
     Note: Tests fails when threshold is set significanly below 1 second, presumably
           because of the nature of threading. Should be irrelevant if the threshold
           in registrar is set significantly higher.
     """
     producer_instance.registrar.threshold = 1
     with producer_instance as producer:
         with attach_spy_on_func(producer.registrar.clog_writer,
                                 'publish') as func_spy:
             producer.publish(
                 CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE")))
             producer.publish(
                 CreateMessage(schema_id=2,
                               payload=bytes("DIFFERENT FAKE MESSAGE")))
             time.sleep(2.5)
             assert func_spy.call_count == 4
Exemple #8
0
    def topic_offsets(self, request, producer, random_schema, containers):
        is_fresh_topic = request.param
        if is_fresh_topic:
            containers.create_kafka_topic(str(random_schema.topic.name))
            return {}

        message = CreateMessage(random_schema.schema_id, payload=str('-1'))
        producer.publish(message)
        producer.flush()
        return producer.get_checkpoint_position_data(
        ).topic_to_kafka_offset_map
Exemple #9
0
 def _setup_new_topic_and_publish_message_helper(self, schematizer_client,
                                                 publish_messages, schema,
                                                 payload_data, namespace,
                                                 source, message_count):
     registered_non_compatible_schema = schematizer_client.register_schema(
         namespace=namespace,
         source=source,
         schema_str=schema,
         source_owner_email='*****@*****.**',
         contains_pii=False)
     message = CreateMessage(
         schema_id=registered_non_compatible_schema.schema_id,
         payload_data=payload_data)
     publish_messages(message, count=message_count)
     return message
Exemple #10
0
    def test_create_from_offset_and_message_with_no_reader_schema_specified(
            self, registered_schema, payload, example_payload_data):
        unpacked_message = CreateMessage(
            schema_id=registered_schema.schema_id,
            payload=payload,
            timestamp=1500,
        )
        offset_and_message = OffsetAndMessage(
            0, create_message(Envelope().pack(unpacked_message)))

        extracted_message = create_from_offset_and_message(
            offset_and_message=offset_and_message, reader_schema_id=None)
        assert extracted_message.schema_id == registered_schema.schema_id
        assert extracted_message.topic == registered_schema.topic.name
        assert extracted_message.reader_schema_id == registered_schema.schema_id
        assert extracted_message.payload_data == example_payload_data
Exemple #11
0
    def test_ensure_messages_published_on_new_topic(self, create_new_schema,
                                                    producer):
        """When a topic doesn't exist, all of the messages on that topic should
        be published.
        """
        new_schema = create_new_schema(source='ensure_published_source_two')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))
        topic = str(new_schema.topic.name)

        with attach_spy_on_func(producer, 'publish') as func_spy:
            producer.ensure_messages_published([message], {})
            assert func_spy.call_count == 1
        with setup_capture_new_messages_consumer(topic) as consumer:
            kafka_offset = 0
            consumer.seek(kafka_offset, 0)  # kafka_offset from head
            self._assert_all_messages_published(consumer,
                                                expected_payloads=[1])
Exemple #12
0
    def test_skip_publish_pii_message(self, pii_schema, payload,
                                      producer_instance):
        with reconfigure(encryption_type='AES_MODE_CBC-1',
                         skip_messages_with_pii=True
                         ), producer_instance as producer, mock.patch.object(
                             data_pipeline._kafka_producer,
                             'logger') as mock_logger:
            pii_message = CreateMessage(schema_id=pii_schema.schema_id,
                                        payload=payload)
            messages = self._publish_message(pii_message, producer)

        assert len(messages) == 0
        assert len(multiprocessing.active_children()) == 0
        call_args = ("Skipping a PII message - uuid hex: {}, schema_id: {}, "
                     "timestamp: {}, type: {}").format(
                         pii_message.uuid_hex, pii_message.schema_id,
                         pii_message.timestamp, pii_message.message_type.name)
        assert mock_logger.info.call_args_list[0] == mock.call(call_args)
Exemple #13
0
    def test_publish_to_new_topic(self, create_new_schema, producer):
        new_schema = create_new_schema(source='retry_source')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))

        with attach_spy_on_func(producer._kafka_producer.kafka_client,
                                'send_produce_request') as send_request_spy:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            send_request_spy.reset()

            producer.publish(message)
            producer.flush()

            # it should fail at least the 1st time because the topic doesn't
            # exist. Depending on how fast the topic is created, it could retry
            # more than 2 times.
            assert send_request_spy.call_count >= 2

        messages = self.get_messages_from_start(message.topic)
        self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
        self.assert_new_topic_to_offset_map(producer,
                                            message.topic,
                                            orig_topic_to_offset_map,
                                            published_message_count=1)
Exemple #14
0
 def current_message(self, registered_auto_refresh_schema, payload):
     return CreateMessage(
         schema_id=registered_auto_refresh_schema.schema_id,
         payload=payload)
Exemple #15
0
 def create_message_with_payload_data(self):
     return CreateMessage(
         schema_id=SchemaFactory.get_schema_json().schema_id,
         payload_data=SchemaFactory.get_payload_data()
     )
Exemple #16
0
 def message(self, registered_schema):
     return CreateMessage(registered_schema.schema_id,
                          payload_data={'good_field': 100},
                          timestamp=1500)
Exemple #17
0
def message(registered_schema, payload):
    return CreateMessage(schema_id=registered_schema.schema_id,
                         payload=payload)
Exemple #18
0
def payload_data_message(registered_schema, example_payload_data):
    return CreateMessage(schema_id=registered_schema.schema_id,
                         payload_data=example_payload_data)
Exemple #19
0
def message_with_pkeys(registered_schema_with_pkey,
                       example_payload_with_pkeys):
    return CreateMessage(schema_id=registered_schema_with_pkey.schema_id,
                         payload=example_payload_with_pkeys)
Exemple #20
0
 def _create_message(**overrides):
     return CreateMessage(schema_id=registered_schema.schema_id,
                          payload=payload,
                          **overrides)
Exemple #21
0
 def create_message(schema_id, payload_data):
     CreateMessage(schema_id=schema_id, payload_data=payload_data)
Exemple #22
0
 def _create_message(self, schema, payload, **kwargs):
     return CreateMessage(schema_id=schema.schema_id,
                          payload=payload,
                          timestamp=1500,
                          **kwargs)
Exemple #23
0
 def input_compatible_message(self, registered_compatible_schema,
                              compatible_payload_data):
     return CreateMessage(schema_id=registered_compatible_schema.schema_id,
                          payload_data=compatible_payload_data)
Exemple #24
0
 def expected_message(self, registered_schema, payload):
     return CreateMessage(schema_id=registered_schema.schema_id,
                          payload=payload)
Exemple #25
0
 def another_message(self, another_schema, payload):
     return CreateMessage(another_schema.schema_id, payload=payload)
Exemple #26
0
 def pii_message(self, pii_schema, payload):
     return CreateMessage(schema_id=pii_schema.schema_id, payload=payload)
Exemple #27
0
 def _create_message(self, **kwargs):
     message_data = self.valid_message_data
     message_data.update(kwargs)
     return CreateMessage(**message_data)