def test_avro_serializer_multiple_topic_per_serializer_instance(
        mock_schema_registry):
    """
    Ensures schema_id is correctly find when same serializer is used for multiple topics
    """
    conf = {'url': TEST_URL}
    test_client = mock_schema_registry(conf)
    topic1 = "test-topic1"
    topic2 = "test-topic2"

    test_serializer = AvroSerializer("string",
                                     test_client,
                                     conf={'auto.register.schemas': False})

    def ensure_id_match(ctx):
        subject = "{}-{}".format(ctx.topic, ctx.field)
        expected_id = find_schema_id(subject)

        payload = test_serializer("test", ctx)
        _, schema_id = unpack('>bI', BytesIO(payload).read(5))
        assert schema_id == expected_id

    ensure_id_match(SerializationContext(topic1, MessageField.KEY))
    ensure_id_match(SerializationContext(topic2, MessageField.VALUE))
    ensure_id_match(SerializationContext(topic1, MessageField.KEY))

    # Ensure lookup_schema was invoked only once per shema
    assert test_client.counter['POST'].get(
        '/subjects/{}-key'.format(topic1)) == 1
    assert test_client.counter['POST'].get(
        '/subjects/{}-value'.format(topic2)) == 1
def test_avro_serializer_preload_schema_id(mock_schema_registry):
    """
    Ensures serializer do not reload schema ID from registry after user has force its preloading.
    """
    conf = {'url': TEST_URL}
    test_client = mock_schema_registry(conf)
    topic1 = "test-topic1"
    topic2 = "test-topic2"

    test_serializer = AvroSerializer("string",
                                     test_client,
                                     conf={'auto.register.schemas': False})

    test_serializer.load_registry_schema_id(
        SerializationContext(topic1, MessageField.KEY))
    test_serializer.load_registry_schema_id(
        SerializationContext(topic2, MessageField.VALUE))

    # Ensure lookup_schema was invoked only once per shema
    assert test_client.counter['POST'].get(
        '/subjects/{}-key'.format(topic1)) == 1
    assert test_client.counter['POST'].get(
        '/subjects/{}-value'.format(topic2)) == 1

    test_serializer("test", SerializationContext(topic1, MessageField.KEY))
    test_serializer("test", SerializationContext(topic2, MessageField.VALUE))

    # Ensure we did not look again to avro registry
    assert test_client.counter['POST'].get(
        '/subjects/{}-key'.format(topic1)) == 1
    assert test_client.counter['POST'].get(
        '/subjects/{}-value'.format(topic2)) == 1
예제 #3
0
def test_avro_serializer_config_use_latest_version(mock_schema_registry):
    """
    Ensures auto.register.schemas=False does not register schema
    """
    conf = {'url': TEST_URL}
    test_client = mock_schema_registry(conf)
    topic = "test-use-latest-version"
    subject = topic + '-key'

    test_serializer = AvroSerializer(test_client,
                                     'string',
                                     conf={
                                         'auto.register.schemas': False,
                                         'use.latest.version': True
                                     })

    test_serializer(
        "test",
        SerializationContext("test-use-latest-version", MessageField.KEY))

    register_count = test_client.counter['POST'].get(
        '/subjects/{}/versions'.format(subject), 0)
    assert register_count == 0
    # Ensure latest was requested
    assert test_client.counter['GET'].get(
        '/subjects/{}/versions/latest'.format(subject)) == 1
    def assert_cb(err, msg):
        actual = value_deserializer(msg.value(),
                                    SerializationContext(topic, MessageField.VALUE, msg.headers()))

        if record_type == "record":
            assert [v == actual[k] for k, v in data.items()]
        elif record_type == 'float':
            assert data == pytest.approx(actual)
        else:
            assert actual == data
예제 #5
0
def test_avro_serializer_subject_name_strategy_default(load_avsc):
    """
    Ensures record_subject_name_strategy returns the correct record name
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(test_client,
                                     load_avsc('basic_schema.avsc'))

    ctx = SerializationContext('test_subj', MessageField.VALUE)
    assert test_serializer._subject_name_func(
        ctx, test_serializer._schema_name) == 'test_subj-value'
예제 #6
0
def test_avro_serializer_topic_record_subject_name_strategy_primitive(load_avsc):
    """
    Ensures record_subject_name_strategy returns the correct record name.
    Also verifies transformation from Avro canonical form.
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(test_client, 'int',
                                     conf={'subject.name.strategy':
                                           topic_record_subject_name_strategy})

    ctx = SerializationContext('test_subj', MessageField.VALUE)
    assert test_serializer._subject_name_func(
        ctx, test_serializer._schema_name) == 'test_subj-int'
예제 #7
0
def test_avro_serializer_topic_record_subject_name_strategy(load_avsc):
    """
    Ensures record_subject_name_strategy returns the correct record name
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(
        load_avsc('basic_schema.avsc'),
        test_client,
        conf={'subject.name.strategy': topic_record_subject_name_strategy})

    ctx = SerializationContext('test_subj', MessageField.VALUE)
    assert test_serializer._subject_name_func(
        ctx, test_serializer._schema_name) == 'test_subj-python.test.basic'
def process_row(serialized_data):
    schema = '''
    {
    "namespace": "org.mddarr.rides.event.dto",
     "type": "record",
     "name": "AvroRideCoordinate",
     "fields": [
         {"name": "dataID", "type": "string"},
         {"name": "value", "type": "double"}
     ]
    }
    '''
    schemaRegistryClient = SchemaRegistryClient(
        {"url": "http://localhost:8081"})
    avroDeserializer = AvroDeserializer(schema, schemaRegistryClient)
    serializationContext = SerializationContext("time-series", schema)
    deserialized_row = avroDeserializer(serialized_data, serializationContext)
    return str(deserialized_row['value'])
예제 #9
0
    def send_msg(self, message, partition=0, topic=None):

        # if no topic is provided, the first topic in the list is used as default
        if topic is None:
            out_topic = self.out_topic[0]
        else:
            out_topic = topic

        # encode the data with the specified Avro out_schema
        ctx = SerializationContext(out_topic, MessageField.VALUE)
        ser_message = self.serializer[out_topic](message, ctx)

        try:
            self.producer.produce(topic=out_topic,
                                  value=ser_message,
                                  partition=partition)
        except Exception as e:
            print(f"Error sending data to Kafka: {repr(e)}")
def process_row(serialized_data):
    schema = '''
    {
    "namespace": "org.mddarr.rides.event.dto",
     "type": "record",
     "name": "AvroRideCoordinate",
     "fields": [
         {"name": "eventime", "type": "long"},
         {"name": "latitude", "type": "double"},
         {"name": "longitude", "type": "double"}
     ]
    }
    '''
    schemaRegistryClient = SchemaRegistryClient({"url": "http://localhost:8081"})
    avroDeserializer = AvroDeserializer(schema, schemaRegistryClient)
    serializationContext = SerializationContext("coordinates", schema)
    deserialized_row = avroDeserializer(serialized_data, serializationContext)
    print("THE DESERIALIZED ROW LOOKS LIKE " + str(deserialized_row))

    return [deserialized_row['latitude'], deserialized_row['longitude'],float(deserialized_row['eventime'])]
예제 #11
0
def test_avro_serializer_config_auto_register_schemas_false(mock_schema_registry):
    """
    Ensures auto.register.schemas=False does not register schema
    """
    conf = {'url': TEST_URL}
    test_client = mock_schema_registry(conf)
    topic = "test-auto-register"
    subject = topic + '-key'

    test_serializer = AvroSerializer(test_client, 'string',
                                     conf={'auto.register.schemas': False})

    test_serializer("test",
                    SerializationContext("test-auto-register",
                                         MessageField.KEY))

    register_count = test_client.counter['POST'].get('/subjects/{}/versions'
                                                     .format(subject), 0)
    assert register_count == 0
    # Ensure lookup_schema was invoked instead
    assert test_client.counter['POST'].get('/subjects/{}'.format(subject)) == 1
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroDeserializer
from confluent_kafka.serialization import SerializationContext

schemaRegistryClient = SchemaRegistryClient({"url": "http://*****:*****@'
)
serializationContext = SerializationContext("time-series", schema)

deserialized_message = avroDeserializer(message, serializationContext)
예제 #13
0
            "type": "string"
        },
        "position": {
            "type": "integer"
        },
    },
}

schema_client = SchemaRegistryClient({"url": s.schema_server})

json_serializer = JSONSerializer(dumps(message_schema),
                                 schema_client,
                                 conf={"auto.register.schemas": False})
string_serializer = StringSerializer()

key_context = SerializationContext(s.registrations_topic, MessageField.KEY)
value_context = SerializationContext(s.registrations_topic, MessageField.VALUE)

con = psycopg2.connect(
    database=s.db_database,
    user=s.db_user,
    password=s.db_password,
    host=s.db_server,
    port=s.db_port,
)


def acked(err, msg):
    """
    Kafka production error callback. Used to raise HTTP 500 when message production fails.
    :param err: Kakfa error
예제 #14
0
import time

from confluent_kafka import Producer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField

print("start 1p_multiples")

broker = 'kafka:9093'
topic = 'multiples'
conf = {'bootstrap.servers': broker}

p = Producer(**conf)
s = StringSerializer()
print("created KafkaPC")

ctx = SerializationContext(topic, MessageField.VALUE)
for i in range(10):

    # casts int to string for StringSerializer/StringDeserializer
    message = s(str(i*i), ctx)

    # DeprecationWarning will be resolved in upcoming release
    # https://github.com/confluentinc/confluent-kafka-python/issues/763
    p.produce(topic, message)

    print(f"Sent message {i*i}")
    time.sleep(1)
예제 #15
0
    def export_items(self, items):
        items_grouped_by_type = group_by_item_type(items)

        for item_type, topic in self.item_type_to_topic_mapping.items():
            item_group = items_grouped_by_type.get(item_type)

            if item_group:
                serialization_context = SerializationContext(
                    topic, MessageField.VALUE)
                for item in item_group:
                    headers = []
                    if item["type"] == "block":
                        # Configure header & key
                        key = bytes(str(item["number"]), "utf-8")
                        headers.append(("hash", bytes(item["hash"], "utf-8")))
                        timestamp = int(item['timestamp'])
                        # Create blocks_raw object
                        value_object = blocks_raw.BlockRaw(
                            type=str(item["type"]),
                            number=item["number"],
                            hash=str(item["hash"]),
                            parent_hash=str(item["parent_hash"]),
                            merkle_root_hash=str(item["merkle_root_hash"]),
                            timestamp=item["timestamp"],
                            version=str(item["version"]),
                            transaction_count=item["transaction_count"],
                            peer_id=str(item["peer_id"]),
                            signature=str(item["signature"]),
                            next_leader=str(item["next_leader"]),
                            item_id=str(item["item_id"]),
                            item_timestamp="",
                        )
                    elif item["type"] == "log":
                        # Configure header & key
                        key = bytes(str(item["transaction_hash"]), "utf-8")
                        headers.append(
                            ("hash", bytes(item["transaction_hash"], "utf-8")))
                        timestamp = int(item['block_timestamp'])
                        headers.append(
                            ("address", bytes(item["address"], "utf-8")))
                        # Create logs_raw object
                        value_object = logs_raw.LogRaw(
                            type=str(item["type"]),
                            log_index=item["log_index"],
                            max_log_index=item["max_log_index"],
                            transaction_hash=str(item["transaction_hash"]),
                            transaction_index=item["transaction_index"],
                            address=str(item["address"]),
                            data=dumps(item["data"]),
                            indexed=dumps(item["indexed"]),
                            block_number=item["block_number"],
                            block_timestamp=item["block_timestamp"],
                            block_hash=str(item["block_hash"]),
                            item_id=str(item["item_id"]),
                            item_timestamp="",
                        )
                    else:
                        # Configure header & key
                        headers.append(("hash", bytes(item["hash"], "utf-8")))
                        timestamp = int(item['block_timestamp'])
                        if item["to_address"]:
                            headers.append(
                                ("to", bytes(item["to_address"], "utf-8")))
                            key = bytes(item["hash"], "utf-8")
                        else:
                            headers.append(("to", bytes("None", "utf-8")))
                            key = bytes(item["hash"], "utf-8")
                        if item["from_address"]:
                            headers.append(
                                ("from", bytes(item["from_address"], "utf-8")))
                        else:
                            headers.append(("from", bytes("None", "utf-8")))
                        value_object = transactions_raw.TransactionRaw(
                            type=str(item["type"]),
                            version=str(item["version"]),
                            from_address=str(item["from_address"]),
                            to_address=str(item["to_address"]),
                            value=dec_to_hex(item["value"]),
                            step_limit=item["step_limit"],
                            timestamp=str(item["timestamp"]),
                            block_timestamp=item["block_timestamp"],
                            nid=item["nid"],
                            nonce=dec_to_hex(item["nonce"]),
                            hash=str(item["hash"]),
                            transaction_index=item["transaction_index"],
                            block_hash=str(item["block_hash"]),
                            block_number=item["block_number"],
                            fee=item["fee"],
                            signature=str(item["signature"]),
                            data_type=str(item["data_type"]),
                            data=dumps(item["data"]),
                            receipt_cumulative_step_used=item[
                                "receipt_cumulative_step_used"],
                            receipt_step_used=item["receipt_step_used"],
                            receipt_step_price=item["receipt_step_price"],
                            receipt_score_address=str(
                                item["receipt_score_address"]),
                            receipt_logs=str(item["receipt_logs"]),
                            receipt_status=item["receipt_status"],
                            item_id=str(item["item_id"]),
                            item_timestamp=str(item["item_timestamp"]),
                        )

                    if self.serializers:
                        self.producer.produce(
                            topic=topic,
                            value=self.serializers[item_type](
                                value_object, serialization_context),
                            key=key,
                            headers=headers,
                            timestamp=timestamp,
                        )
                    else:
                        self.producer.produce(
                            topic,
                            value=MessageToJson(value_object),
                            key=key,
                            headers=headers,
                            timestamp=timestamp,
                        )
                    self.producer.poll(0)

                self.producer.flush()