Ejemplo n.º 1
0
def test_avro_serializer_topic_record_subject_name_strategy(load_avsc):
    """
    Ensures record_subject_name_strategy returns the correct record name
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(test_client,
                                     load_avsc('basic_schema.avsc'),
                                     conf={'subject.name.strategy':
                                           topic_record_subject_name_strategy})

    ctx = SerializationContext('test_subj', MessageField.VALUE)
    assert test_serializer._subject_name_func(
        ctx, test_serializer._schema_name) == 'test_subj-python.test.basic'
Ejemplo n.º 2
0
def test_avro_serializer_topic_record_subject_name_strategy_primitive(load_avsc):
    """
    Ensures record_subject_name_strategy returns the correct record name.
    Also verifies transformation from Avro canonical form.
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(test_client, 'int',
                                     conf={'subject.name.strategy':
                                           topic_record_subject_name_strategy})

    ctx = SerializationContext('test_subj', MessageField.VALUE)
    assert test_serializer._subject_name_func(
        ctx, test_serializer._schema_name) == 'test_subj-int'
Ejemplo n.º 3
0
def avro_messages_producer(schema):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    producer_conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.serializer': string_serializer,
        'value.serializer': avro_serializer
    }

    return SerializingProducer(producer_conf)
Ejemplo n.º 4
0
def test_delivery_report_serialization(kafka_cluster, load_file, avsc, data,
                                       record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_file (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    """
    topic = kafka_cluster.create_topic("serialization-avro-dr")
    sr = kafka_cluster.schema_registry()
    schema_str = load_file(avsc)

    value_serializer = AvroSerializer(sr, schema_str)

    value_deserializer = AvroDeserializer(sr)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    def assert_cb(err, msg):
        actual = value_deserializer(
            msg.value(), SerializationContext(topic, MessageField.VALUE))

        if record_type == "record":
            assert [v == actual[k] for k, v in data.items()]
        elif record_type == 'float':
            assert data == pytest.approx(actual)
        else:
            assert actual == data

    producer.produce(topic, value=data, partition=0, on_delivery=assert_cb)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    # schema may include default which need not exist in the original
    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data
Ejemplo n.º 5
0
def test_avro_serializer_schema_loads_union(load_avsc):
    """
    Ensures union types are correctly parsed
    """
    conf = {'url': TEST_URL}
    test_client = SchemaRegistryClient(conf)
    test_serializer = AvroSerializer(test_client,
                                     load_avsc('union_schema.avsc'))

    assert test_serializer._schema_name is None

    schema = test_serializer._parsed_schema
    assert isinstance(schema, list)
    assert schema[0]["name"] == "RecordOne"
    assert schema[1]["name"] == "RecordTwo"
Ejemplo n.º 6
0
def send_record(args):
    """ Sends Record using a SerializingProducer & AvroSerializer """
    topic = args.topic.rstrip()

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_serializer = AvroSerializer(schema_registry_client, DATA_SCHEMA,
                                     data_to_dict)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "key.serializer": StringSerializer('utf_8'),
        "value.serializer": avro_serializer
    }
    producer = SerializingProducer(producer_config)

    split_incoming_data = args.record_value.split(',')
    if not len(split_incoming_data) == 7:  # Data Format Check
        print('** Error: Insufficient Incoming Data: ', split_incoming_data)
        raise Exception
    try:  # Data Format Check
        incoming_data = {
            'envId': int(split_incoming_data[0]),
            'whenCollected': str(split_incoming_data[1]),
            'timeLightOnMins': int(split_incoming_data[2]),
            'humidity': int(split_incoming_data[3]),
            'soilMoisture': int(split_incoming_data[4]),
            'temperature': int(split_incoming_data[5]),
            'waterConsumption': int(split_incoming_data[6])
        }
    except Exception as error:
        print('** Error Creating Dict of Data: ', error)

    print(f'Producing data records to topic {topic}. ^C to exit.')
    producer.poll(1)
    try:
        key = args.record_key if args.record_key else str(uuid4())
        data_object = Data(incoming_data)
        print('\t-Producing Avro record. . .')
        producer.produce(topic=topic,
                         key=key,
                         value=data_object,
                         on_delivery=delivery_report)
    except ValueError:
        print('\t-Invalid input, discarding record. . .')
    print('\nFlushing records. . .')
    producer.flush()
Ejemplo n.º 7
0
    def __init__(self, value_schema, groupID='KafkaAvroProducer'):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
Ejemplo n.º 8
0
def test_avro_serializer_config_auto_register_schemas_false(mock_schema_registry):
    """
    Ensures auto.register.schemas=False does not register schema
    """
    conf = {'url': TEST_URL}
    test_client = mock_schema_registry(conf)
    topic = "test-auto-register"
    subject = topic + '-key'

    test_serializer = AvroSerializer(test_client, 'string',
                                     conf={'auto.register.schemas': False})

    test_serializer("test",
                    SerializationContext("test-auto-register",
                                         MessageField.KEY))

    register_count = test_client.counter['POST'].get('/subjects/{}/versions'
                                                     .format(subject), 0)
    assert register_count == 0
    # Ensure lookup_schema was invoked instead
    assert test_client.counter['POST'].get('/subjects/{}'.format(subject)) == 1
Ejemplo n.º 9
0
def test_avro_record_serialization(kafka_cluster, load_avsc, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_avsc (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    Raises:
        AssertionError on test failure

    """
    topic = kafka_cluster.create_topic("serialization-avro")
    sr = kafka_cluster.schema_registry()

    schema_str = load_avsc(avsc)
    value_serializer = AvroSerializer(sr, schema_str)

    value_deserializer = AvroDeserializer(sr, schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    producer.produce(topic, value=data, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data
Ejemplo n.º 10
0
def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(User.avro_schema(),
                                     schema_registry_client,
                                     user_to_dict)

    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': avro_serializer}

    producer = SerializingProducer(producer_conf)

    print(f"Producing user records to topic {topic}. ^C to exit.")
    while True:
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic, key=str(uuid4()), value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Ejemplo n.º 11
0
value_schema = avro.loads(value_schema_str)



def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))

schema_registry_conf = {'url': 'http://schema-registry:8081'}
schema_registry_client = SchemaRegistryClient(schema_registry_conf)
avro_serializer = AvroSerializer(value_schema_str, schema_registry_client, caseToDict)
producer_conf = {'bootstrap.servers': 'kafka:29092',
                    'key.serializer': StringSerializer('utf_8'),
                    'value.serializer': avro_serializer}

producer = SerializingProducer(producer_conf)
        



# avroProducer = AvroProducer({
#     'bootstrap.servers': 'kafka:29092',
#     'on_delivery': delivery_report,
#     'schema.registry.url': 'http://schema-registry:8081'
#     }, default_key_schema=key_schema, default_value_schema=value_schema)
Ejemplo n.º 12
0
class TestMessages:
    test_messages = [
        'test message 1', 'test message 2', 'test message 3', 'test message 4'
    ]
    topic = 'christian_test'
    conf = kafka_utils.read_config('producer_google_chicago_1.config',
                                   'producer_google_chicago_1')
    schema_registry_conf = {'url': conf['schema.registry.url']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    key_schema_file = portfolio_path + "/kafka" + conf['google.key.schema.file']
    value_schema_file = portfolio_path + "/kafka" + conf[
        'google.value.schema.file']
    key_schema, value_schema = kafka_utils.load_avro_schema_from_file(
        key_schema_file, value_schema_file)
    key_avro_serializer = AvroSerializer(key_schema, schema_registry_client,
                                         google.Key.key_to_dict)
    value_avro_serializer = AvroSerializer(value_schema,
                                           schema_registry_client,
                                           google.Value.value_to_dict)
    key_avro_deserializer = AvroDeserializer(key_schema,
                                             schema_registry_client,
                                             google.Key.dict_to_key)
    value_avro_deserializer = AvroDeserializer(value_schema,
                                               schema_registry_client,
                                               google.Value.dict_to_value)

    def test_producer(self):
        # Read arguments and configurations and initialize
        producer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.serializer': self.key_avro_serializer,
            'value.serializer': self.value_avro_serializer
        }
        producer = SerializingProducer(producer_config)

        delivered_records = 0
        for text in self.test_messages:
            url = 'www.test.com'
            scraper_dt = datetime.now(pytz.timezone('America/Denver'))
            scraper_dt = scraper_dt.strftime("%Y/%m/%d %H:%M:%S %z")
            value_obj = google.Value(text=text, scraper_dt=scraper_dt)
            key_obj = google.Key(url=(url))
            producer.produce(topic=self.topic,
                             key=key_obj,
                             value=value_obj,
                             on_delivery=kafka_utils.acked)
            delivered_records += producer.poll()
        producer.flush()

        assert delivered_records == len(self.test_messages)

    def test_consumer(self):
        consumer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.deserializer': self.key_avro_deserializer,
            'value.deserializer': self.value_avro_deserializer,
            'group.id': '1',
            'auto.offset.reset': 'earliest'
        }
        offset = kafka_utils.offset - len(self.test_messages) + 1
        consumer = DeserializingConsumer(consumer_config)
        partitions = []
        partition = TopicPartition(topic=self.topic,
                                   partition=0,
                                   offset=offset)
        partitions.append(partition)
        consumer.assign(partitions)
        # Process messages
        result = []
        attempt = 0
        while len(result) < len(self.test_messages):
            try:
                msg = consumer.poll(1.0)
                attempt += 1
                if msg is None:
                    print("no message received")
                    if attempt < 10:
                        pass
                    else:
                        break
                elif msg.error():
                    break
                else:
                    value_object = msg.value()
                    text = value_object.text
                    print("adding {} to result".format(text))
                    result.append(text)
            except KeyboardInterrupt:
                break
            except SerializerError as e:
                break
        # Leave group and commit final offsets
        consumer.close()

        assert result == self.test_messages
Ejemplo n.º 13
0
def main(args):
    topic = args.topic
    schema_str = EventSchema

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_str, schema_registry_client)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    list_type = [{
        "grilleIdent": "Numero 123T",
        "codeRetourServiceMetier": "code 23432543",
        "referer": "1qsd",
        "browserVersion": "qsdqsd",
        "androidUDID": "qsdqsdqsd",
        "iosIDFA": "qdqsdqsd",
        "appVersion": "qsdqsdqsdqsd",
        "idTmx": "qsdqsdqsd"
    }, {
        "numeroCompteBeneficiaire": "Numero 123T",
        "codePaysResidence": "code 23432543",
        "codePaysResidenceIso": "code 23432543",
        "adresseBeneficiaire": "code 23432543",
        "nomCompletBeneficiaire": "code 23432543",
        "idListeBeneficiaire": "code 23432543",
        "idBeneficiaire": "code 23432543",
        "modeValidation": 34,
        "bicBeneficiaire": "code 23432543",
        "idTmx": "code 23432543"
    }]
    while True:
        x = random.choice([0, 1])

        eventHeader = {
            "eventId": str(uuid4()),
            "dateTimeRef": 1589364605654,
            "nomenclatureEv": "Event Header",
            "canal": 1,
            "media": 2,
            "schemaVersion": "v0",
            "headerVersion": "v2",
            "serveur": "s1",
            "acteurDeclencheur": {
                "adresseIP": "127.0.0.1",
                "idTelematique": str(uuid4()),
                "idPersonne": "zahir"
            }
        }
        value = {
            "EventHeader": eventHeader,
            "EventBusinessContext": list_type[x]
        }
        print(value)
        producer.produce(topic=topic,
                         key=str(uuid4()),
                         value=value,
                         on_delivery=delivery_report)
        producer.flush()
        time.sleep(0.1)
Ejemplo n.º 14
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_enriched_event_str = EnrichedEventSchema
    schema_dict = ast.literal_eval(schema_enriched_event_str)
    schema_metrics = MetricSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_metrics, schema_registry_client)
    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    avro_deserializer = AvroDeserializer(schema_enriched_event_str,
                                         schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")

    cluster.register_user_type('datascience', 'datafield', Datafield)

    client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2")
    # client_influxdb = InfluxDBClient(url="http://35.181.155.182:8086 , "mydb")

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()

            query = f"""
            insert into eventenrich (
                        "eventId" ,
                        "dateTimeRef",
                        "nomenclatureEv",
                        "canal",
                        "media",
                        "schemaVersion",
                        "headerVersion",
                        "serveur",
                        "adresseIP",
                        "idTelematique",
                        "idPersonne",
                        "dateNaissance",
                        "paysResidence",
                        "paysNaissance",
                        "revenusAnnuel",
                        "csp",
                        "eventBC",
                        "eventContent"
                        )
                        VALUES (%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s)
                    """

            #eventBc = evt["EventBusinessContext"][0].replace("com.bnpparibas.dsibddf.event.","")
            eventBc = evt["eventBC"].replace("com.bnpparibas.dsibddf.event.",
                                             "")
            eventContent = evt["EventBusinessContext"][1]

            transformed_event = transform_enriched_event_to_cassandra_model(
                evt, eventBc, schema_dict, eventContent)

            insert_enriched_event_to_cassandra(transformed_event, session,
                                               query)

            elapsed_time = (time.time() - start)

        except Exception as e:
            print(f"Exception => {e}")
            continue

        query = 'SELECT * FROM metrics'
        result = client_influxdb.query(query, database="dbsaleh2")
        print(result)

        data = []

        print(elapsed_time)
        metrics = [{
            "measurement": "metrics",
            "fields": {
                "metricName": "hystorize",
                "timeforhystorize": elapsed_time
            }
        }]
        data.append(metrics)

        # client_influxdb.write_points("hystorize",elapsed_time, database="dbsaleh2")
        client_influxdb.write_points(metrics, database="dbsaleh2")
        producer.produce(topic=outputtopic,
                         value={
                             'metricName': "hystorize",
                             'time': elapsed_time
                         },
                         on_delivery=delivery_report)
        producer.flush()

    consumer.close()
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


hostname = "mysql_db"
portnum = 3306
uname = "root"
paswd = "password"
dbase = "pythonTest"
tablename = "transactions_produced"

schema_registry_conf = {'url': 'http://schema-registry:8081'}
schema_registry_client = SchemaRegistryClient(schema_registry_conf)
avro_serializer = AvroSerializer(value_schema_str, schema_registry_client,
                                 transactionToDict)
producer_conf = {
    'bootstrap.servers': 'kafka:29092',
    'key.serializer': StringSerializer('utf_8'),
    'value.serializer': avro_serializer
}

producer = SerializingProducer(producer_conf)
while True:
    # Serve on_delivery callbacks from previous calls to produce()
    producer.poll(0.0)
    try:
        countRows = MySQLCount(hostname, portnum, uname, paswd, dbase,
                               tablename)["count(*)"]
        j = 1
        for i in range(1, countRows + 1):
Ejemplo n.º 16
0
    conf = ccloud_lib.read_ccloud_config(config_file)

    # Create topic if needed
    ccloud_lib.create_topic(conf, topic)

    # for full list of configurations, see:
    #  https://docs.confluent.io/platform/current/clients/confluent-kafka-python/#schemaregistryclient
    schema_registry_conf = {
        'url': conf['schema.registry.url'],
        'basic.auth.user.info': conf['basic.auth.user.info']
    }

    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    name_avro_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client,
        schema_str=ccloud_lib.name_schema,
        to_dict=ccloud_lib.Name.name_to_dict)
    count_avro_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client,
        schema_str=ccloud_lib.count_schema,
        to_dict=ccloud_lib.Count.count_to_dict)

    # for full list of configurations, see:
    #  https://docs.confluent.io/platform/current/clients/confluent-kafka-python/#serializingproducer
    producer_conf = ccloud_lib.pop_schema_registry_params_from_config(conf)
    producer_conf['key.serializer'] = name_avro_serializer
    producer_conf['value.serializer'] = count_avro_serializer
    producer = SerializingProducer(producer_conf)

    delivered_records = 0
Ejemplo n.º 17
0
def main(
    name: str,
    shutdown: multiprocessing.Value,
    request_queue: multiprocessing.Queue,
    config: Config
) -> None:
    """Execute tasks forever.

    This method is the entrypoint for the worker which executes the monitoring
    tasks. It is executed in a dedicate child process.
    """
    if config.verbose:
        logging.basicConfig(level=logging.INFO)
    log = logging.getLogger(name)
    log.info(f"Starting process {name}.")

    # SIGINT will be delivered to the whole process group. We'll need to ignore
    # it in the worker processes to give them the opportunity to finish any
    # pending work.
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    schema_registry_client = SchemaRegistryClient({
        'url': config.schema_registry
    })
    avro_serializer = AvroSerializer(
        Report.SCHEMA,
        schema_registry_client,
        Report.asdict
    )

    producer = SerializingProducer({
        'client.id': name,
        'bootstrap.servers': config.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'security.protocol': 'SSL',
        'ssl.key.location': config.auth_key,
        'ssl.certificate.location': config.auth_cert,
        'ssl.ca.location': config.ca_cert,
        'value.serializer': avro_serializer,
    })
    err = _report_error(log)

    while not shutdown.value:
        producer.poll(0.0)
        try:
            now = datetime.now()
            req = request_queue.get(timeout=1)
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, req)
            curl.setopt(pycurl.CONNECTTIMEOUT, 30)
            curl.setopt(pycurl.TIMEOUT, 300)
            curl.setopt(pycurl.NOSIGNAL, 1)
            curl.setopt(pycurl.WRITEFUNCTION, len)
            try:
                curl.perform()
                report = Report(
                    timestamp=now.timestamp(),
                    url=req,
                    code=int(curl.getinfo(pycurl.RESPONSE_CODE)),
                    namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME),
                    connect=curl.getinfo(pycurl.CONNECT_TIME),
                    appconnect=curl.getinfo(pycurl.APPCONNECT_TIME),
                    pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME),
                    starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME),
                    total=curl.getinfo(pycurl.TOTAL_TIME),
                )
                log.info(str(report))
                producer.produce(
                    topic=config.topic,
                    key=req,
                    value=report,
                    on_delivery=err
                )
            except TypeError:
                # It'll never work if we misconfigure PycURL.
                raise
            except pycurl.error as exc:
                # TODO: Record the failure in Kafka.
                log.warning(f"Failed to retrieve {req}", exc)
            # TODO: Handle exceptions from the Kafka Producer.
            finally:
                curl.close()
        except queue.Empty:
            log.debug("No request to process.")
    # Flush any results that haven't been committed yet.
    log.warning(f"Process {name} shutting down.")
    producer.flush()
Ejemplo n.º 18
0
def send_record(args):
    if not any([args.record_value, args.record_file]):
        raise AttributeError(
            "--record-value or --record-file are not provided.")

    if args.schema_file is None:
        raise AttributeError("--schema-file is not provided.")

    if args.security_protocol and args.security_protocol.lower() not in [
            'plaintext', 'ssl'
    ]:
        raise AttributeError(
            "--security-protocol must be either plaintext or ssl.")

    schema_registry_client = SchemaRegistryClient(
        {'url': args.schema_registry})

    with open(args.schema_file, 'r') as file:
        schema = file.read()

    string_serializer = StringSerializer('utf-8')
    avro_serializer = AvroSerializer(schema, schema_registry_client)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        'key.serializer': string_serializer,
        'value.serializer': avro_serializer,
    }

    security_protocol = args.security_protocol.lower()

    if security_protocol == "ssl" and all(
        [args.ssl_ca_location, args.ssl_cert_location, args.ssl_key_location]):
        producer_config.update({
            'security.protocol':
            security_protocol,
            'ssl.ca.location':
            args.ssl_ca_location,
            'ssl.key.location':
            args.ssl_key_location,
            'ssl.certificate.location':
            args.ssl_cert_location
        })
    else:
        raise AttributeError(
            "--security-protocol is ssl, please supply certificates.")

    producer = SerializingProducer(producer_config)

    key = args.record_key if args.record_key else str(uuid.uuid4())

    if args.record_file:
        with open(args.record_file, 'r') as f:
            data = f.readlines()
        for line in data:
            try:
                producer.produce(topic=args.topic,
                                 key=key,
                                 value=json.loads(line))
            except Exception as e:
                print(
                    f"Exception while producing record value - {line} to topic - {args.topic}: {e}"
                )
            else:
                print(
                    f"Successfully producing record value - {line} to topic - {args.topic}"
                )
    else:
        value = args.record_value

        try:
            producer.produce(topic=args.topic, key=key, value=value)
        except Exception as e:
            print(
                f"Exception while producing record value - {value} to topic - {args.topic}: {e}"
            )
        else:
            print(
                f"Successfully producing record value - {value} to topic - {args.topic}"
            )

    producer.flush()
Ejemplo n.º 19
0
 def value_serializer(self):
     return AvroSerializer(
         schema_str=self._record.value_schema_string,
         schema_registry_client=self._schema_registry_client,
     )
Ejemplo n.º 20
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    loop = asyncio.get_event_loop()

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            print("msg=>", evt)

            def enrich(evt):
                print("evt", evt)
                if evt is not None:
                    print("récupérer dans kafka")
                    row = session.execute(
                        GET_ENRICHED_DATA_QUERY,
                        (evt["EventHeader"]["acteurDeclencheur"]["idPersonne"],
                         )).one()

                    if row:
                        evt['EnrichedData'] = row
                        # evt['EventBusinessContext'] = evt["EventBusinessContext"][1]
                        EnrichedEvent = {
                            "eventId":
                            evt["EventHeader"]["eventId"],
                            "dateTimeRef":
                            evt["EventHeader"]["dateTimeRef"],
                            "nomenclatureEv":
                            evt["EventHeader"]["nomenclatureEv"],
                            "canal":
                            evt["EventHeader"]["canal"],
                            "media":
                            evt["EventHeader"]["media"],
                            "schemaVersion":
                            evt["EventHeader"]["schemaVersion"],
                            "headerVersion":
                            evt["EventHeader"]["headerVersion"],
                            "serveur":
                            evt["EventHeader"]["serveur"],
                            "adresseIP":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["adresseIP"],
                            "idTelematique":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idTelematique"],
                            "idPersonne":
                            evt["EventHeader"]["acteurDeclencheur"]
                            ["idPersonne"],
                            "dateNaissance":
                            row["dateNaissance"],
                            "paysResidence":
                            row["paysResidence"],
                            "paysNaissance":
                            row["paysNaissance"],
                            "revenusAnnuel":
                            row["revenusAnnuel"],
                            "csp":
                            row["csp"],
                            "EventBusinessContext":
                            evt["EventBusinessContext"]
                        }

                        producer.produce(topic=outputtopic,
                                         key=str(uuid4()),
                                         value=EnrichedEvent,
                                         on_delivery=delivery_report)
                        producer.flush()

            async_enrich = async_wrap(enrich)
            loop.run_until_complete(async_enrich(evt))

        except Exception:
            print('Exception')
            continue

    consumer.close()
Ejemplo n.º 21
0
    # Create topic if needed
    print("connecting to server", conf['bootstrap.servers'])
    kafka_utils.create_topic(conf=conf,
                             topic=topic,
                             num_partitions=1,
                             replication_factor=1)

    schema_registry_conf = {'url': conf['schema.registry.url']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)
    key_schema_path = this_path + conf['google.key.schema.file']
    value_schema_path = this_path + conf['google.value.schema.file']
    key_schema, value_schema = kafka_utils.load_avro_schema_from_file(
        key_schema_path, value_schema_path)

    key_avro_serializer = AvroSerializer(key_schema, schema_registry_client,
                                         google.Key.key_to_dict)
    value_avro_serializer = AvroSerializer(value_schema,
                                           schema_registry_client,
                                           google.Value.value_to_dict)

    producer_config = {
        'bootstrap.servers': conf['bootstrap.servers'],
        'key.serializer': key_avro_serializer,
        'value.serializer': value_avro_serializer
    }
    # 'stats_cb': print_stats,
    # 'statistics.interval.ms': 1000}

    producer = SerializingProducer(producer_config)

    #wait until the kafka topic and schema registry are up before proceeding
Ejemplo n.º 22
0
from confluent_kafka import SerializingProducer
from confluent_kafka.schema_registry import topic_record_subject_name_strategy
from confluent_kafka.schema_registry.avro import AvroSerializer

from avro_schemas.key_schema import key_schema_str
from avro_schemas.value_schema import value_schema_str
from constants import Topics, BOOTSTRAP_SERVERS, SCHEMA_REGISTRY_CLIENT
from utils import get_partition, on_delivery

avro_serializer_config = {
    "auto.register.schemas": True,
    "subject.name.strategy": topic_record_subject_name_strategy,
}
key_serializer = AvroSerializer(
    schema_str=key_schema_str,
    schema_registry_client=SCHEMA_REGISTRY_CLIENT,
    conf=avro_serializer_config,
)
value_serializer = AvroSerializer(
    schema_str=value_schema_str,
    schema_registry_client=SCHEMA_REGISTRY_CLIENT,
    conf=avro_serializer_config,
)
producer_config = {
    "key.serializer": key_serializer,
    "value.serializer": value_serializer,
    # 'transaction.timeout.ms': 60000,
    # 'enable.idempotence': True,
    # 'debug': "all"
    "bootstrap.servers": BOOTSTRAP_SERVERS
}
Ejemplo n.º 23
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_enriched_event_str = EnrichedEventSchema
    schema_metrics = MetricSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_metrics, schema_registry_client)
    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    avro_deserializer = AvroDeserializer(schema_enriched_event_str,
                                         schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2")

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()

            idPersonne = evt["idPersonne"]

            rows = session.execute(GET_ENRICHED_EVENT_QUERY, (idPersonne, ))
            if rows:
                # print(idPersonne, f"rows={rows.all().__len__()}")
                # stat_process(idPersonne, rows)
                # som = rec_process(rows,0,0)
                # print("some", som)

                # row["csp"] = get_value_column_enriched_data(row, "csp")
                # row["paysNaissance"] = get_value_column_enriched_data(row, "paysNaissance")
                #
                #
                # #get_value_column_event_content
                # row['appVersion'] = get_value_column_event_content(row, "appVersion")
                # row['montant'] = get_value_column_event_content(row, "montant")
                # row['androidID'] = get_value_column_event_content(row, "androidID")

                # del rows[0]['eventContent']

                elapsed_time = time.time() - start

                #producer.produce(topic=outputtopic, key=str(uuid4()), value={'metricName':"hystorize",'time':elapsed_time}, on_delivery=delivery_report)
                #producer.flush()

        except Exception:
            print('Exception')
            continue

        metrics = [{
            "measurement": "metrics",
            "fields": {
                "metricName": "score",
                "timeforscore": elapsed_time
            }
        }]
        print(elapsed_time)

        client_influxdb.write_points(metrics, database="dbsaleh2")
        producer.produce(topic=outputtopic,
                         value={
                             'metricName': "score",
                             'time': elapsed_time
                         },
                         on_delivery=delivery_report)
        producer.flush()

    consumer.close()