def messages(self, random_schema): return [ CreateMessage(random_schema.schema_id, payload=str(i), upstream_position_info={'position': i + 1}) for i in range(self.number_of_messages) ]
def secondary_messages(self, secondary_random_schema): return [ CreateMessage(secondary_random_schema.schema_id, payload=str(i), upstream_position_info={'position': i + 1}) for i in range(-2, 1) ]
def _create_message(schema, timeslot): monitor_start_time = producer.monitor.start_time return CreateMessage( schema.schema_id, payload=payload, timestamp=int(self.get_timestamp(monitor_start_time, timeslot)))
def test_producer_registration_message_on_exit(self, producer_instance): producer = producer_instance.__enter__() with attach_spy_on_func(producer.registrar, 'stop') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("Test message"))) producer.__exit__(None, None, None) assert func_spy.call_count == 1
def _setup_handle_data_create_event_to_publish_call( self, producer, stats_counter, test_table, test_topic, first_test_kafka_offset, second_test_kafka_offset, data_event_handler, data_create_events, schema_wrapper_entry, patches, patch_get_payload_schema, position): expected_call_args = [] for data_event in data_create_events: position = LogPosition(log_file='binlog', log_pos=100) upstream_position_info = { "position": position.to_dict(), "cluster_name": "yelp_main", "database_name": "fake_database", "table_name": "fake_table" } data_event_handler.handle_event(data_event, position) expected_call_args.append( CreateMessage(payload_data=data_event.row["values"], schema_id=schema_wrapper_entry.schema_id, upstream_position_info=upstream_position_info, keys=(u'primary_key', ), timestamp=data_event.timestamp)) actual_call_args = [i[0][0] for i in producer.publish.call_args_list] self._assert_messages_as_expected(expected_call_args, actual_call_args) assert producer.publish.call_count == len(data_create_events)
def test_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure( encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=False), producer_instance as producer: pii_message = CreateMessage(schema_id=pii_schema.schema_id, payload=payload) self._publish_and_assert_pii_message(pii_message, producer) assert len(multiprocessing.active_children()) == 0
def test_producer_periodic_registration_messages(self, producer_instance): """ Note: Tests fails when threshold is set significanly below 1 second, presumably because of the nature of threading. Should be irrelevant if the threshold in registrar is set significantly higher. """ producer_instance.registrar.threshold = 1 with producer_instance as producer: with attach_spy_on_func(producer.registrar.clog_writer, 'publish') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE"))) producer.publish( CreateMessage(schema_id=2, payload=bytes("DIFFERENT FAKE MESSAGE"))) time.sleep(2.5) assert func_spy.call_count == 4
def topic_offsets(self, request, producer, random_schema, containers): is_fresh_topic = request.param if is_fresh_topic: containers.create_kafka_topic(str(random_schema.topic.name)) return {} message = CreateMessage(random_schema.schema_id, payload=str('-1')) producer.publish(message) producer.flush() return producer.get_checkpoint_position_data( ).topic_to_kafka_offset_map
def _setup_new_topic_and_publish_message_helper(self, schematizer_client, publish_messages, schema, payload_data, namespace, source, message_count): registered_non_compatible_schema = schematizer_client.register_schema( namespace=namespace, source=source, schema_str=schema, source_owner_email='*****@*****.**', contains_pii=False) message = CreateMessage( schema_id=registered_non_compatible_schema.schema_id, payload_data=payload_data) publish_messages(message, count=message_count) return message
def test_create_from_offset_and_message_with_no_reader_schema_specified( self, registered_schema, payload, example_payload_data): unpacked_message = CreateMessage( schema_id=registered_schema.schema_id, payload=payload, timestamp=1500, ) offset_and_message = OffsetAndMessage( 0, create_message(Envelope().pack(unpacked_message))) extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message, reader_schema_id=None) assert extracted_message.schema_id == registered_schema.schema_id assert extracted_message.topic == registered_schema.topic.name assert extracted_message.reader_schema_id == registered_schema.schema_id assert extracted_message.payload_data == example_payload_data
def test_ensure_messages_published_on_new_topic(self, create_new_schema, producer): """When a topic doesn't exist, all of the messages on that topic should be published. """ new_schema = create_new_schema(source='ensure_published_source_two') message = CreateMessage(new_schema.schema_id, payload=str('1')) topic = str(new_schema.topic.name) with attach_spy_on_func(producer, 'publish') as func_spy: producer.ensure_messages_published([message], {}) assert func_spy.call_count == 1 with setup_capture_new_messages_consumer(topic) as consumer: kafka_offset = 0 consumer.seek(kafka_offset, 0) # kafka_offset from head self._assert_all_messages_published(consumer, expected_payloads=[1])
def test_skip_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure(encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=True ), producer_instance as producer, mock.patch.object( data_pipeline._kafka_producer, 'logger') as mock_logger: pii_message = CreateMessage(schema_id=pii_schema.schema_id, payload=payload) messages = self._publish_message(pii_message, producer) assert len(messages) == 0 assert len(multiprocessing.active_children()) == 0 call_args = ("Skipping a PII message - uuid hex: {}, schema_id: {}, " "timestamp: {}, type: {}").format( pii_message.uuid_hex, pii_message.schema_id, pii_message.timestamp, pii_message.message_type.name) assert mock_logger.info.call_args_list[0] == mock.call(call_args)
def test_publish_to_new_topic(self, create_new_schema, producer): new_schema = create_new_schema(source='retry_source') message = CreateMessage(new_schema.schema_id, payload=str('1')) with attach_spy_on_func(producer._kafka_producer.kafka_client, 'send_produce_request') as send_request_spy: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) send_request_spy.reset() producer.publish(message) producer.flush() # it should fail at least the 1st time because the topic doesn't # exist. Depending on how fast the topic is created, it could retry # more than 2 times. assert send_request_spy.call_count >= 2 messages = self.get_messages_from_start(message.topic) self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def current_message(self, registered_auto_refresh_schema, payload): return CreateMessage( schema_id=registered_auto_refresh_schema.schema_id, payload=payload)
def create_message_with_payload_data(self): return CreateMessage( schema_id=SchemaFactory.get_schema_json().schema_id, payload_data=SchemaFactory.get_payload_data() )
def message(self, registered_schema): return CreateMessage(registered_schema.schema_id, payload_data={'good_field': 100}, timestamp=1500)
def message(registered_schema, payload): return CreateMessage(schema_id=registered_schema.schema_id, payload=payload)
def payload_data_message(registered_schema, example_payload_data): return CreateMessage(schema_id=registered_schema.schema_id, payload_data=example_payload_data)
def message_with_pkeys(registered_schema_with_pkey, example_payload_with_pkeys): return CreateMessage(schema_id=registered_schema_with_pkey.schema_id, payload=example_payload_with_pkeys)
def _create_message(**overrides): return CreateMessage(schema_id=registered_schema.schema_id, payload=payload, **overrides)
def create_message(schema_id, payload_data): CreateMessage(schema_id=schema_id, payload_data=payload_data)
def _create_message(self, schema, payload, **kwargs): return CreateMessage(schema_id=schema.schema_id, payload=payload, timestamp=1500, **kwargs)
def input_compatible_message(self, registered_compatible_schema, compatible_payload_data): return CreateMessage(schema_id=registered_compatible_schema.schema_id, payload_data=compatible_payload_data)
def expected_message(self, registered_schema, payload): return CreateMessage(schema_id=registered_schema.schema_id, payload=payload)
def another_message(self, another_schema, payload): return CreateMessage(another_schema.schema_id, payload=payload)
def pii_message(self, pii_schema, payload): return CreateMessage(schema_id=pii_schema.schema_id, payload=payload)
def _create_message(self, **kwargs): message_data = self.valid_message_data message_data.update(kwargs) return CreateMessage(**message_data)