Python SerializingProducer.produce Exemples, confluent_kafka.SerializingProducer.produce Python Exemples

Exemple #1

0

Afficher le fichier

def main(args):
    topic = args.topic
    delimiter = args.delimiter

    producer_conf = producer_config(args)

    producer = SerializingProducer(producer_conf)

    print('Producing records to topic {}. ^C to exit.'.format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            msg_data = input(">")
            msg = msg_data.split(delimiter)
            if len(msg) == 2:
                producer.produce(topic=topic, key=msg[0], value=msg[1],
                                 on_delivery=delivery_report)
            else:
                producer.produce(topic=topic, value=msg[0],
                                 on_delivery=delivery_report)
        except KeyboardInterrupt:
            break

    print('\nFlushing {} records...'.format(len(producer)))
    producer.flush()

Exemple #2

0

Afficher le fichier

Fichier : pyconnectsource.py Projet : real-digital/pyconnect

    def _commit(self) -> None:
        """
        Retrieves the current offset by calling :meth:`pyconnect.pyconnectsource.PyConnectSource.get_index` and
        publishes it to the offset topic that is defined in this sources :class:`pyconnect.config.SourceConfig`
        instance.
        """
        idx = self.get_index()
        idx_schema = to_value_schema(idx)
        avro_value_serializer = AvroSerializer(
            schema_registry_client=self.schema_registry_client,
            schema_str=idx_schema)

        producer_config = {
            "bootstrap.servers": self.config["bootstrap.servers"],
            "key.serializer": None,
            "value.serializer": avro_value_serializer,
            **self.config["kafka_opts"],
            **self.config["kafka_producer_opts"],
        }

        offset_producer = SerializingProducer(producer_config)
        offset_producer.produce(key=None,
                                value=idx,
                                topic=self.config["offset_topic"])
        offset_producer.flush()

Exemple #3

0

Afficher le fichier

Fichier : weather-alert-app.py Projet : entechlog/kakfa-examples

def write_to_kafka(bootstrap_servers, schema_registry_url, topic_name, data):

    print("Kafka Version                : ", confluent_kafka.version(),confluent_kafka.libversion())

    schema_registry_conf = {'url': schema_registry_url}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    value_avro_serializer = AvroSerializer(schemas.weather_source_schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    conf = {'bootstrap.servers': bootstrap_servers,
            'client.id': socket.gethostname(),
            'on_delivery': delivery_report,
            'key.serializer': string_serializer,
            'value.serializer': value_avro_serializer    
            }

    avroProducer = SerializingProducer(conf)
    
    key=datetime.date.today() + '~' + str(data['lat']) + '~' + str(data['lon'])
    message = json.dumps(data, cls=DatetimeEncoder)

    print("Key Type                     : ", type(key))
    print("Value Type                   : ", type(json.loads(message)))
  
    avroProducer.produce(topic=topic_name, key=key, value=json.loads(message))
    avroProducer.flush()

Exemple #4

0

Afficher le fichier

Fichier : sasl_producer.py Projet : MahimaGaikwad/confluent-kafka-python-t

def main(args):
    topic = args.topic
    delimiter = args.delimiter
    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': StringSerializer('utf_8')}

    producer_conf.update(sasl_conf(args))

    producer = SerializingProducer(producer_conf)

    print("Producing records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            msg_data = input(">")
            msg = msg_data.split(delimiter)
            if len(msg) == 2:
                producer.produce(topic=topic, key=msg[0], value=msg[1],
                                 on_delivery=delivery_report)
            else:
                producer.produce(topic=topic, value=msg[0],
                                 on_delivery=delivery_report)
        except KeyboardInterrupt:
            break

    print("\nFlushing {} records...".format(len(producer)))
    producer.flush()

Exemple #5

0

Afficher le fichier

Fichier : avro_producer.py Projet : shiresn/confluent-kafka-python

def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_registry_client, schema_str,
                                     user_to_dict)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_address = input("Enter address: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        address=user_address,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()

Exemple #6

0

Afficher le fichier

Fichier : proto_producer.py Projet : Ycallaer/kafka_proto_py

class ProtoKafkaProducer:
    def __init__(self, config_env):
        self.config = config_env
        self.topic_name = self.config["kafka_produce_topic"]

        conf = {
            'bootstrap.servers':
            self.config["bootstrap_servers"],
            'message.max.bytes':
            self.config["kafkaMaxMessageBytes"],
            'queue.buffering.max.ms':
            self.config["queue.buffering.max.ms"],
            'queue.buffering.max.messages':
            self.config["queue.buffering.max.messages"],
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self.__protobuf_serializer()
        }

        self.producer = SerializingProducer(conf)

    def on_delivery(self, err, msg):
        if err:
            print("Message failed delivery, error: %s", err)
        else:
            print("Message delivered to %s on partition %s", msg.topic(),
                  msg.partition())

    def __protobuf_serializer(self):
        schema_registry_conf = {'url': self.config['schemaregistry.url']}
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        _proto_conf = {
            'auto.register.schemas': self.config['auto.register.schemas'],
        }

        return ProtobufSerializer(self.config['proto_msg_type'],
                                  schema_registry_client,
                                  conf=_proto_conf)

    def produce(self, kafka_msg, kafka_key):
        try:
            self.producer.produce(topic=self.topic_name,
                                  value=kafka_msg,
                                  key=kafka_key,
                                  on_delivery=self.on_delivery)

            self.producer.flush()

        except Exception as e:
            print("Error during producing to kafka topic. Stacktrace is %s", e)

Exemple #7

0

Afficher le fichier

Fichier : __init__.py Projet : JeppeKlitgaard/NewsScraper

class NewsScheduler(object):
    def __init__(self,
                 bootstrap_servers,
                 rss_feeds,
                 topic='crawl-queue',
                 time_checkpoint_fn_base='scheduler_checkpoint'):
        self.bootstrap_servers = bootstrap_servers
        self.topic = topic

        self.feeds = rss_feeds

        self.time_checkpoints = dict()
        for spider_name in self.feeds.values():
            fn = f'{time_checkpoint_fn_base}_{spider_name}.txt'
            fn = fn.replace('/', '_')  # we don't want / in our pathnames.
            self.time_checkpoints[spider_name] = TimeCheckpoint(fn=fn)

        producer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': _json_serializer_wrapper
        }
        self.producer = SerializingProducer(producer_conf)

    def process_feed(self, feed_url, spider_name, flush=False):
        log.info(f"Processing feed '{feed_url}' via topic '{self.topic}'.")
        rss_feed = feedparser.parse(feed_url)

        for item in rss_feed.entries:
            item['spider'] = spider_name
            item_updated_time = struct_time_to_datetime(item.updated_parsed)

            if item_updated_time > self.time_checkpoints[
                    spider_name].checkpoint:
                log.info(f"New item: {item['title']}")

                self.producer.produce(topic=self.topic,
                                      key=str(uuid4()),
                                      value=dict(item))

        self.time_checkpoints[
            spider_name].checkpoint = struct_time_to_datetime(
                rss_feed.feed.updated_parsed)

        if flush:
            self.producer.flush()

    def run_loop(self, interval):
        for feed, spider in itertools.cycle(self.feeds.items()):
            self.process_feed(feed, spider)
            time.sleep(interval)

Exemple #8

0

Afficher le fichier

Fichier : KafkaAvroProducer.py Projet : jesusmah/vaccine-order-optimizer

class KafkaAvroProducer:
    def __init__(self,
                 producer_name,
                 value_schema,
                 groupID='KafkaAvroProducer'):

        # Consumer name for logging purposes
        self.logging_prefix = '[' + producer_name + '][KafkaAvroProducer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = EventBackboneConfig.getProducerConfiguration(
            groupID, self.key_serializer, self.value_serializer)
        EventBackboneConfig.printProducerConfiguration(
            self.logging_prefix, self.producer_conf,
            self.schema_registry_conf['url'])
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
        if err is not None:
            print(
                '[KafkaAvroProducer] - [ERROR] - Message delivery failed: {}'.
                format(err))
        else:
            print('[KafkaAvroProducer] - Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def publishEvent(self, key, value, topicName='kafka-avro-producer'):
        # Produce the Avro message
        self.producer.produce(topic=topicName,
                              value=value,
                              key=key,
                              on_delivery=self.delivery_report)
        # Flush
        self.producer.flush()

Exemple #9

0

Afficher le fichier

def send_record(args):
    """ Sends Record using a SerializingProducer & AvroSerializer """
    topic = args.topic.rstrip()

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_serializer = AvroSerializer(schema_registry_client, DATA_SCHEMA,
                                     data_to_dict)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "key.serializer": StringSerializer('utf_8'),
        "value.serializer": avro_serializer
    }
    producer = SerializingProducer(producer_config)

    split_incoming_data = args.record_value.split(',')
    if not len(split_incoming_data) == 7:  # Data Format Check
        print('** Error: Insufficient Incoming Data: ', split_incoming_data)
        raise Exception
    try:  # Data Format Check
        incoming_data = {
            'envId': int(split_incoming_data[0]),
            'whenCollected': str(split_incoming_data[1]),
            'timeLightOnMins': int(split_incoming_data[2]),
            'humidity': int(split_incoming_data[3]),
            'soilMoisture': int(split_incoming_data[4]),
            'temperature': int(split_incoming_data[5]),
            'waterConsumption': int(split_incoming_data[6])
        }
    except Exception as error:
        print('** Error Creating Dict of Data: ', error)

    print(f'Producing data records to topic {topic}. ^C to exit.')
    producer.poll(1)
    try:
        key = args.record_key if args.record_key else str(uuid4())
        data_object = Data(incoming_data)
        print('\t-Producing Avro record. . .')
        producer.produce(topic=topic,
                         key=key,
                         value=data_object,
                         on_delivery=delivery_report)
    except ValueError:
        print('\t-Invalid input, discarding record. . .')
    print('\nFlushing records. . .')
    producer.flush()

Exemple #10

0

Afficher le fichier

Fichier : conftest.py Projet : real-digital/pyconnect

def plain_avro_producer(running_cluster_config: Dict[str, str],
                        topic_and_partitions: Tuple[str, int],
                        records) -> SerializingProducer:
    """
    Creates a plain `confluent_kafka.avro.AvroProducer` that can be used to publish messages.
    """
    topic_id, _ = topic_and_partitions

    key, value = records[0]

    schema_registry_client = SchemaRegistryClient(
        {"url": running_cluster_config["schema-registry"]})
    key_schema = to_key_schema(key)
    avro_key_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client, schema_str=key_schema)
    value_schema = to_value_schema(value)
    avro_value_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client, schema_str=value_schema)

    producer_config = {
        "bootstrap.servers": running_cluster_config["broker"],
        "key.serializer": avro_key_serializer,
        "value.serializer": avro_value_serializer,
    }

    producer = SerializingProducer(producer_config)

    producer.produce = partial(producer.produce, topic=topic_id)

    return producer

Exemple #11

0

Afficher le fichier

Fichier : kafka_emitter.py Projet : NathanFaught/datahub

class DatahubKafkaEmitter:
    def __init__(self, config: KafkaEmitterConfig):
        self.config = config

        schema_registry_conf = {
            "url": self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(
            mce: MetadataChangeEvent, ctx: SerializationContext
        ) -> dict:
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(
            schema_str=SCHEMA_JSON_STR,
            schema_registry_client=schema_registry_client,
            to_dict=convert_mce_to_dict,
        )

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            "key.serializer": StringSerializer("utf_8"),
            "value.serializer": avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)

    def emit_mce_async(
        self,
        mce: MetadataChangeEvent,
        callback: Callable[[Exception, str], None],
    ) -> None:
        # Call poll to trigger any callbacks on success / failure of previous writes
        self.producer.poll(0)
        self.producer.produce(
            topic=self.config.topic,
            key=mce.proposedSnapshot.urn,
            value=mce,
            on_delivery=callback,
        )

    def flush(self) -> None:
        self.producer.flush()

Exemple #12

0

Afficher le fichier

Fichier : avro_producer.py Projet : bugfloyd/kafka-pipeline-example

def main(args):
    topic = args.topic

    key_schema_str = open('schema/KeySchema.avsc', "r").read()
    value_schema_str = open('schema/ValueSchema.avsc', "r").read()
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_key_serializer = AvroSerializer(key_schema_str, schema_registry_client, user_quote_key_to_dict)
    avro_value_serializer = AvroSerializer(value_schema_str, schema_registry_client, user_quote_value_to_dict)

    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': avro_key_serializer,
                     'value.serializer': avro_value_serializer}

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_id = input("Enter User ID: ")
            product_id = input("Enter Product ID: ")
            quoted_price = input("Enter price: ")
            quoted_quantity = int(input("Enter the desired quantity: "))
            user_note = input("Enter additional note: ")

            user_quote_key = UserQuoteKey(user_id=int(user_id))

            user_quote_value = UserQuoteValue(product_id=int(product_id),
                                              quoted_price=int(quoted_price),
                                              quoted_quantity=quoted_quantity,
                                              user_note=user_note)

            producer.produce(topic=topic, key=user_quote_key, value=user_quote_value,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()

Exemple #13

0

Afficher le fichier

class VideoProducer:
    def __init__(self,
                 topic='test',
                 client_id='producer1',
                 bootstrap_servers='localhost:9092',
                 video_reader=None):
        self.topic = topic
        self.video_reader = video_reader
        self.kafka_producer = SerializingProducer({
            'bootstrap.servers':
            bootstrap_servers,
            'value.serializer':
            self.video_reader.serialize,
            'queue.buffering.max.messages':
            500000
        })
        self.delivered_records = 0
        self.start_time = 0

    def acked(self, err, msg):
        """Delivery report handler called on
        successful or failed delivery of message
        """
        if err is not None:
            print("Failed to deliver message: {}".format(err))
        else:
            self.delivered_records += 1
        # print(sys.getsizeof(message))

    def produce(self):
        start_time = time.time()
        while (time.time() - start_time < 60 and self.video_reader.online):
            self.kafka_producer.poll(0.0)
            frame = self.video_reader.read()
            if frame is not None:
                self.kafka_producer.produce(topic=self.topic,
                                            value=frame,
                                            on_delivery=self.acked)
        print("\nFlushing records...")
        self.kafka_producer.flush()
        finished_time = time.time()
        print("MPS: {}".format(self.delivered_records /
                               (finished_time - start_time)))
        self.video_reader.release()

Exemple #14

0

Afficher le fichier

class KafkaProducer:
    def __init__(self, topic, producer_config):
        self._topic = topic
        self._producer = SerializingProducer(producer_config.dict)

    def produce(self, record):
        while True:
            try:
                self._producer.produce(topic=self._topic,
                                       key=record.key_to_avro_dict(),
                                       value=record.value_to_avro_dict(),
                                       on_delivery=self._delivery_report)
                self._producer.poll(0)
                break
            except BufferError as e:
                print(f'Failed to send on attempt {record}. '
                      f'Error received {str(e)}')
                self._producer.poll(1)

    def flush(self):
        if self._producer:
            self._producer.flush()

    @staticmethod
    def _delivery_report(err: KafkaError, msg: Message):
        """ Reports the failure or success of a message delivery.

        Note:
            In the delivery report callback the Message.key()
            and Message.value() will be the binary format as
            encoded by any configured Serializers and
            not the same object that was passed to produce().
            If you wish to pass the original object(s)
            for key and value to delivery
            report callback we recommend a bound callback
            or lambda where you pass the objects along.

        Args:
            err ([KafkaError]): The error that occurred on None on success.
            msg ([Message]): The message that was produced or failed.
        """

        if err is not None:
            print(f"Delivery failed for record {msg.key()}: {err}")

Exemple #15

0

Afficher le fichier

Fichier : protobuf_producer.py Projet : slominskir/confluent-kafka-python

def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    protobuf_serializer = ProtobufSerializer(user_pb2.User,
                                             schema_registry_client,
                                             {'use.deprecated.format': True})

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': protobuf_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = user_pb2.User(name=user_name,
                                 favorite_color=user_favorite_color,
                                 favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             partition=0,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except (KeyboardInterrupt, EOFError):
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()

Exemple #16

0

Afficher le fichier

def produce(goal):
    count = 0
    reusableProducer = SerializingProducer(getConfigs())
    while (count < goal):
        try:
            reusableProducer.produce(topic='myprototopic',
                                     key=str(uuid4()),
                                     value=generateRecord(),
                                     on_delivery=getReport)
            # print("In process:{}".format(multiprocessing.current_process().name))
            reusableProducer.poll(0.0)
        except KeyboardInterrupt:
            break
        except BufferError:
            sys.stderr.write(
                '%% Local producer queue is full (%d messages awaiting delivery): flushing...\n'
                % len(reusableProducer))
            reusableProducer.flush()

    print("Flushing one producer thread")
    reusableProducer.flush()

Exemple #17

0

Afficher le fichier

Fichier : producer.py Projet : 97nitt/kafka-sandbox

class Producer:
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 value_serializer=None,
                 config=None):

        producer_config = {
            "bootstrap.servers": bootstrap_servers,
            "value.serializer": value_serializer
        }
        if config:
            producer_config.update(config)

        self.producer = SerializingProducer(producer_config)
        self.topic = topic

    def send(self, key=None, value=None, on_delivery=default_callback):
        self.producer.produce(self.topic,
                              key=key,
                              value=value,
                              on_delivery=on_delivery)
        self.producer.flush()

Exemple #18

0

Afficher le fichier

    def produce(self, count: int):
        def increment(err, msg):
            assert err is None
            assert msg is not None
            assert msg.offset() == self.acked
            self.logger.debug("Acked offset %d", msg.offset())
            self.acked += 1

        producer = SerializingProducer({
            'bootstrap.servers':
            self.brokers,
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self._make_serializer()
        })

        self.logger.info("Producing %d %s records to topic %s", count,
                         self.schema_type.name, self.topic)
        for i in range(count):
            # Prevent overflow of buffer
            while len(producer) > 50000:
                # Serve on_delivery callbacks from previous calls to produce()
                producer.poll(0.1)

            producer.produce(topic=self.topic,
                             key=str(uuid4()),
                             value=self._make_payload(i),
                             on_delivery=increment)
            self.produced += 1

        self.logger.info("Flushing records...")
        producer.flush()
        self.logger.info("Records flushed: %d", self.produced)
        while self.acked < count:
            producer.poll(0.01)
        self.logger.info("Records acked: %d", self.acked)

Exemple #19

0

Afficher le fichier

    def test_producer(self):
        # Read arguments and configurations and initialize
        producer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.serializer': self.key_avro_serializer,
            'value.serializer': self.value_avro_serializer
        }
        producer = SerializingProducer(producer_config)

        delivered_records = 0
        for text in self.test_messages:
            url = 'www.test.com'
            scraper_dt = datetime.now(pytz.timezone('America/Denver'))
            scraper_dt = scraper_dt.strftime("%Y/%m/%d %H:%M:%S %z")
            value_obj = google.Value(text=text, scraper_dt=scraper_dt)
            key_obj = google.Key(url=(url))
            producer.produce(topic=self.topic,
                             key=key_obj,
                             value=value_obj,
                             on_delivery=kafka_utils.acked)
            delivered_records += producer.poll()
        producer.flush()

        assert delivered_records == len(self.test_messages)

Exemple #20

0

Afficher le fichier

def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(User.avro_schema(),
                                     schema_registry_client,
                                     user_to_dict)

    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': avro_serializer}

    producer = SerializingProducer(producer_conf)

    print(f"Producing user records to topic {topic}. ^C to exit.")
    while True:
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic, key=str(uuid4()), value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()

Exemple #21

0

Afficher le fichier

from confluent_kafka import SerializingProducer
from confluent_kafka.serialization import IntegerSerializer, StringSerializer


def callback(err, msg):
    if err is not None:
        print(f'Message deliver failed: {err}')
    else:
        print(f'Message delivered to {msg.topic()} [{msg.partition()}]')


p = SerializingProducer({
    'bootstrap.servers': 'localhost:9092',
    'key.serializer': IntegerSerializer(),
    'value.serializer': StringSerializer()
})


for i in range(100):
    polling_result = p.poll(0)
    if polling_result:
        print(f'Polling result: {polling_result}')
    p.produce('sample-topic', key=i, value=f'hello world {i}', on_delivery=callback)

p.flush()

Exemple #22

0

Afficher le fichier

Fichier : KcAvroProducer.py Projet : jesusmah/refarch-eda-tools

class KafkaAvroProducer:
    def __init__(self, value_schema, groupID='KafkaAvroProducer'):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)

    def getSchemaRegistryConf(self):
        try:
            # For IBM Event Streams on IBM Cloud and on OpenShift, the Schema Registry URL is some sort of
            # https://KAFKA_USER:KAFKA_PASSWORD@SCHEMA_REGISTRY_URL
            # Make sure the SCHEMA_REGISTRY_URL your provide is in the form described above.
            url = os.environ['SCHEMA_REGISTRY_URL']
            # If we are talking to ES on prem, it uses an SSL self-signed certificate.
            # Therefore, we need the CA public certificate for the SSL connection to happen.
            if (os.path.isfile(os.getenv('KAFKA_CERT', '/certs/es-cert.pem'))):
                ssl = os.getenv('KAFKA_CERT', '/certs/es-cert.pem')
                return {'url': url, 'ssl.ca.location': ssl}
            return {'url': url}
        except KeyError:
            print(
                '[KafkaAvroProducer] - [ERROR] - There is no SCHEMA_REGISTRY_URL environment variable'
            )
            exit(1)

    def getProducerConfiguration(self, groupID):
        try:
            options = {
                'bootstrap.servers': os.environ['KAFKA_BROKERS'],
                'group.id': groupID,
                'key.serializer': self.key_serializer,
                'value.serializer': self.value_serializer
            }
            if (os.getenv('KAFKA_PASSWORD', '') != ''):
                # Set security protocol common to ES on prem and on IBM Cloud
                options['security.protocol'] = 'SASL_SSL'
                # Depending on the Kafka User, we will know whether we are talking to ES on prem or on IBM Cloud
                # If we are connecting to ES on IBM Cloud, the SASL mechanism is plain
                if (os.getenv('KAFKA_USER', '') == 'token'):
                    options['sasl.mechanisms'] = 'PLAIN'
                # If we are connecting to ES on OCP, the SASL mechanism is scram-sha-512
                else:
                    options['sasl.mechanisms'] = 'SCRAM-SHA-512'
                # Set the SASL username and password
                options['sasl.username'] = os.getenv('KAFKA_USER', '')
                options['sasl.password'] = os.getenv('KAFKA_PASSWORD', '')
            # If we are talking to ES on prem, it uses an SSL self-signed certificate.
            # Therefore, we need the CA public certificate for the SSL connection to happen.
            if (os.path.isfile(os.getenv('KAFKA_CERT', '/certs/es-cert.pem'))):
                options['ssl.ca.location'] = os.getenv('KAFKA_CERT',
                                                       '/certs/es-cert.pem')

            # Print out the producer configuration
            self.printProducerConfiguration(options)

            return options

        except KeyError as error:
            print(
                '[KafkaAvroProducer] - [ERROR] - A required environment variable does not exist: '
                + error)
            exit(1)

    def printProducerConfiguration(self, options):
        # Printing out producer config for debugging purposes
        print(
            "[KafkaAvroProducer] - This is the configuration for the producer:"
        )
        print(
            "[KafkaAvroProducer] - -------------------------------------------"
        )
        print('[KafkaAvroProducer] - Bootstrap Server:      {}'.format(
            options['bootstrap.servers']))
        print('[KafkaAvroProducer] - Schema Registry url:   {}'.format(
            self.schema_registry_conf['url'].split('@')[-1]))
        if (os.getenv('KAFKA_PASSWORD', '') != ''):
            # Obfuscate password
            if (len(options['sasl.password']) > 3):
                obfuscated_password = options['sasl.password'][
                    0] + "*****" + options['sasl.password'][
                        len(options['sasl.password']) - 1]
            else:
                obfuscated_password = "******"
            print('[KafkaAvroProducer] - Security Protocol:     {}'.format(
                options['security.protocol']))
            print('[KafkaAvroProducer] - SASL Mechanism:        {}'.format(
                options['sasl.mechanisms']))
            print('[KafkaAvroProducer] - SASL Username:         {}'.format(
                options['sasl.username']))
            print('[KafkaAvroProducer] - SASL Password:         {}'.format(
                obfuscated_password))
            if (os.path.isfile(os.getenv('KAFKA_CERT', '/certs/es-cert.pem'))):
                print('[KafkaAvroProducer] - SSL CA Location:       {}'.format(
                    options['ssl.ca.location']))
        print(
            "[KafkaAvroProducer] - -------------------------------------------"
        )

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
        if err is not None:
            print(
                '[KafkaAvroProducer] - [ERROR] - Message delivery failed: {}'.
                format(err))
        else:
            print('[KafkaAvroProducer] - Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def publishEvent(self, key, value, topicName='kafka-avro-producer'):
        # Produce the Avro message
        self.producer.produce(topic=topicName,
                              value=value,
                              key=key,
                              on_delivery=self.delivery_report)
        # Flush
        self.producer.flush()

Exemple #23

0

Afficher le fichier

def main(args):
    topic = args.topic
    schema_str = EventSchema

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_str, schema_registry_client)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    list_type = [{
        "grilleIdent": "Numero 123T",
        "codeRetourServiceMetier": "code 23432543",
        "referer": "1qsd",
        "browserVersion": "qsdqsd",
        "androidUDID": "qsdqsdqsd",
        "iosIDFA": "qdqsdqsd",
        "appVersion": "qsdqsdqsdqsd",
        "idTmx": "qsdqsdqsd"
    }, {
        "numeroCompteBeneficiaire": "Numero 123T",
        "codePaysResidence": "code 23432543",
        "codePaysResidenceIso": "code 23432543",
        "adresseBeneficiaire": "code 23432543",
        "nomCompletBeneficiaire": "code 23432543",
        "idListeBeneficiaire": "code 23432543",
        "idBeneficiaire": "code 23432543",
        "modeValidation": 34,
        "bicBeneficiaire": "code 23432543",
        "idTmx": "code 23432543"
    }]
    while True:
        x = random.choice([0, 1])

        eventHeader = {
            "eventId": str(uuid4()),
            "dateTimeRef": 1589364605654,
            "nomenclatureEv": "Event Header",
            "canal": 1,
            "media": 2,
            "schemaVersion": "v0",
            "headerVersion": "v2",
            "serveur": "s1",
            "acteurDeclencheur": {
                "adresseIP": "127.0.0.1",
                "idTelematique": str(uuid4()),
                "idPersonne": "zahir"
            }
        }
        value = {
            "EventHeader": eventHeader,
            "EventBusinessContext": list_type[x]
        }
        print(value)
        producer.produce(topic=topic,
                         key=str(uuid4()),
                         value=value,
                         on_delivery=delivery_report)
        producer.flush()
        time.sleep(0.1)

Exemple #24

0

Afficher le fichier

Fichier : produce-meals.py Projet : haraldkubota/protobuf-kafka-test

import meal_pb2
from confluent_kafka import SerializingProducer
from confluent_kafka.serialization import StringSerializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.protobuf import ProtobufSerializer

topic = 'meal'
schema_registry_client = SchemaRegistryClient({'url': 'http://t620.lan:8081'})
protobuf_serializer = ProtobufSerializer(meal_pb2.Meal, schema_registry_client)

producer_conf = {
    'bootstrap.servers': 't620.lan:9092',
    'key.serializer': StringSerializer('utf_8'),
    'value.serializer': protobuf_serializer
}

producer = SerializingProducer(producer_conf)

producer.poll(0.0)

mybeer = meal_pb2.Meal.DrinkItems(drink_name="beer")
mywine = meal_pb2.Meal.DrinkItems(drink_name="wine")

meal = meal_pb2.Meal(name='pizza', drink=[mybeer, mywine])
# Less meal (for testing kafkajs which seems to miss the drinks
#meal = meal_pb2.Meal(name='pizza', drink=[])

producer.produce(topic=topic, key=str(uuid4()), value=meal)
producer.flush()

Exemple #25

0

Afficher le fichier

Fichier : json_producer.py Projet : shiresn/confluent-kafka-python

def main(args):
    topic = args.topic

    schema_str = """
    {
      "$schema": "http://json-schema.org/draft-07/schema#",
      "title": "User",
      "description": "A Confluent Kafka Python User",
      "type": "object",
      "properties": {
        "name": {
          "description": "User's name",
          "type": "string"
        },
        "favorite_number": {
          "description": "User's favorite number",
          "type": "number",
          "exclusiveMinimum": 0
        },
        "favorite_color": {
          "description": "User's favorite color",
          "type": "string"
        }
      },
      "required": [ "name", "favorite_number", "favorite_color" ]
    }
    """
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    json_serializer = JSONSerializer(schema_registry_client, schema_str,
                                     user_to_dict)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': json_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_address = input("Enter address: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        address=user_address,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()

Exemple #26

0

Afficher le fichier

class KafkaAvroProducer:

    def __init__(self, producer_name, value_schema, groupID = 'KafkaAvroProducer',
                kafka_brokers = "", 
                kafka_user = "", 
                kafka_pwd = "", 
                kafka_cacert = "", 
                kafka_sasl_mechanism = "", 
                topic_name = ""):
        self.kafka_brokers = kafka_brokers
        self.kafka_user = kafka_user
        self.kafka_pwd = kafka_pwd
        self.kafka_sasl_mechanism = kafka_sasl_mechanism
        self.kafka_cacert = kafka_cacert
        self.topic_name = topic_name
        # Consumer name for logging purposes
        self.logging_prefix = '['+ producer_name + '][KafkaAvroProducer]'
        # Schema Registry configuration
        self.schema_registry_conf = {'url': config.SCHEMA_REGISTRY_URL}
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        print(value_schema)
        print(type(value_schema))
        value_schema=value_schema.strip()
        self.value_serializer = AvroSerializer(value_schema, self.schema_registry_client)
        
        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID,
                        self.key_serializer,
                        self.value_serializer)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)



    def delivery_report(self,err, msg):
        """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
        if err is not None:
            print('[KafkaAvroProducer] - [ERROR] - Message delivery failed: {}'.format(err))
        else:
            print('[KafkaAvroProducer] - Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))

    def publishEvent(self, key, value, topicName = 'kafka-avro-producer'):
        # Produce the Avro message
        self.producer.produce(topic=topicName,value=value,key=key, on_delivery=self.delivery_report)
        # Flush
        self.producer.flush()


    def getProducerConfiguration(self,groupID,key_serializer,value_serializer):
        try:
            options ={
                    'bootstrap.servers': os.environ['KAFKA_BROKERS'],
                    'group.id': groupID,
                    'key.serializer': key_serializer,
                    'value.serializer': value_serializer
            }
            if (os.getenv('KAFKA_PASSWORD','') != ''):
                # Set security protocol common to ES on prem and on IBM Cloud
                options['security.protocol'] = 'SASL_SSL'
                # Depending on the Kafka User, we will know whether we are talking to ES on prem or on IBM Cloud
                # If we are connecting to ES on IBM Cloud, the SASL mechanism is plain
                if (os.getenv('KAFKA_USER','') == 'token'):
                    options['sasl.mechanisms'] = 'PLAIN'
                # If we are connecting to ES on OCP, the SASL mechanism is scram-sha-512
                else:
                    options['sasl.mechanisms'] = 'SCRAM-SHA-512'
                # Set the SASL username and password
                options['sasl.username'] = os.getenv('KAFKA_USER','')
                options['sasl.password'] = os.getenv('KAFKA_PASSWORD','')
            # If we are talking to ES on prem, it uses an SSL self-signed certificate.
            # Therefore, we need the CA public certificate for the SSL connection to happen.
            if (os.path.isfile(os.getenv('KAFKA_CERT','/certs/es-cert.pem'))):
                options['ssl.ca.location'] = os.getenv('KAFKA_CERT','/certs/es-cert.pem')
            return options

        except KeyError as error:
            print('[KafkaAvroProducer] - [ERROR] - A required environment variable does not exist: ' + error)
            return {}

Exemple #27

0

Afficher le fichier

# Load HopsWorks Kafka configuration
conf = toml.load('config.toml')
# Initialize a simple String serializer for the key
string_serializer = StringSerializer('utf_8')

producer_conf = {
    'bootstrap.servers': conf['hops']['url'] + ':' + conf['kafka']['port'],
    'security.protocol': 'SSL',
    'ssl.ca.location': conf['project']['ca_file'],
    'ssl.certificate.location': conf['project']['certificate_file'],
    'ssl.key.location': conf['project']['key_file'],
    'ssl.key.password': conf['project']['key_password'],
    'key.serializer': string_serializer,
    'value.serializer': string_serializer,
    'client.id': socket.gethostname()
}

print(producer_conf)

producer = SerializingProducer(producer_conf)

producer.produce(conf['kafka']['topic'],
                 key="key",
                 value="value",
                 on_delivery=acked)

# Wait up to 1 second for events. Callbacks will be invoked during
# this method call if the message is acknowledged.
producer.poll(1)

Exemple #28

0

Afficher le fichier

def main(
    name: str,
    shutdown: multiprocessing.Value,
    request_queue: multiprocessing.Queue,
    config: Config
) -> None:
    """Execute tasks forever.

    This method is the entrypoint for the worker which executes the monitoring
    tasks. It is executed in a dedicate child process.
    """
    if config.verbose:
        logging.basicConfig(level=logging.INFO)
    log = logging.getLogger(name)
    log.info(f"Starting process {name}.")

    # SIGINT will be delivered to the whole process group. We'll need to ignore
    # it in the worker processes to give them the opportunity to finish any
    # pending work.
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    schema_registry_client = SchemaRegistryClient({
        'url': config.schema_registry
    })
    avro_serializer = AvroSerializer(
        Report.SCHEMA,
        schema_registry_client,
        Report.asdict
    )

    producer = SerializingProducer({
        'client.id': name,
        'bootstrap.servers': config.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'security.protocol': 'SSL',
        'ssl.key.location': config.auth_key,
        'ssl.certificate.location': config.auth_cert,
        'ssl.ca.location': config.ca_cert,
        'value.serializer': avro_serializer,
    })
    err = _report_error(log)

    while not shutdown.value:
        producer.poll(0.0)
        try:
            now = datetime.now()
            req = request_queue.get(timeout=1)
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, req)
            curl.setopt(pycurl.CONNECTTIMEOUT, 30)
            curl.setopt(pycurl.TIMEOUT, 300)
            curl.setopt(pycurl.NOSIGNAL, 1)
            curl.setopt(pycurl.WRITEFUNCTION, len)
            try:
                curl.perform()
                report = Report(
                    timestamp=now.timestamp(),
                    url=req,
                    code=int(curl.getinfo(pycurl.RESPONSE_CODE)),
                    namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME),
                    connect=curl.getinfo(pycurl.CONNECT_TIME),
                    appconnect=curl.getinfo(pycurl.APPCONNECT_TIME),
                    pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME),
                    starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME),
                    total=curl.getinfo(pycurl.TOTAL_TIME),
                )
                log.info(str(report))
                producer.produce(
                    topic=config.topic,
                    key=req,
                    value=report,
                    on_delivery=err
                )
            except TypeError:
                # It'll never work if we misconfigure PycURL.
                raise
            except pycurl.error as exc:
                # TODO: Record the failure in Kafka.
                log.warning(f"Failed to retrieve {req}", exc)
            # TODO: Handle exceptions from the Kafka Producer.
            finally:
                curl.close()
        except queue.Empty:
            log.debug("No request to process.")
    # Flush any results that haven't been committed yet.
    log.warning(f"Process {name} shutting down.")
    producer.flush()

Exemple #29

0

Afficher le fichier

class Broker:
    def __init__(self, consumer_topic, producer_topic, client_id,
                 bootstrap_servers, consumer_proto_class, producer_proto_class,
                 processor, max_thread_calls):
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.client_id = client_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_proto_class = consumer_proto_class
        self.producer_proto_class = producer_proto_class
        self.processor = processor
        self.max_thread_calls = max_thread_calls

        self.kafka_consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'group.id':
            self.client_id,
            'auto.offset.reset':
            "earliest",
            'value.deserializer':
            self.derializer
        })
        self.kafka_consumer.subscribe([self.consumer_topic])

        self.kafka_producer = SerializingProducer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'queue.buffering.max.messages':
            500000,
            'value.serializer':
            self.serialize
        })

        self.thread_queue = deque(maxlen=self.max_thread_calls)
        self.latest_thread_queue_id = 1

    def derializer(self, bytes_message, _):
        message = image_pb2.ImageInfo()
        message.ParseFromString(bytes_message)
        return message

    def serialize(self, message, _):
        return message.SerializeToString()

    def get_thread_id(self):
        result = self.latest_thread_queue_id
        if result == self.max_thread_calls:
            self.latest_thread_queue_id = 1
        else:
            self.latest_thread_queue_id += 1
        return result

    def is_thread_queue_full(self):
        return len(self.thread_queue) == self.max_thread_calls

    def produce_when_ready(self, thread_id, message):
        while self.thread_queue[-1] != thread_id:
            logging.warning("Thread {} got stuck in queue".format(thread_id))
            # time.sleep(0.01)
        self.kafka_producer.poll(0.0)
        self.kafka_producer.produce(topic=self.producer_topic, value=message)
        self.thread_queue.pop()

    def call_processor(self, thread_id, value, start_time):
        result = self.processor.process(value)
        self.produce_when_ready(thread_id, result)
        logging.debug("Total time for thead" + str(thread_id) + " is " +
                      str(time.time() - start_time / 1000))

    def run(self):
        while True:
            try:
                if self.is_thread_queue_full():
                    logging.warning(
                        "Thread queue is full, waiting for previous threads to finished"
                    )
                    continue

                msg = self.kafka_consumer.poll(1.0)
                if msg is None or msg.value() is None:
                    logging.warning("No messages from kafka")
                    continue

                caller_thread_id = self.get_thread_id()
                caller_thread = threading.Thread(target=self.call_processor,
                                                 args=(caller_thread_id,
                                                       msg.value(),
                                                       msg.timestamp()[1]))
                self.thread_queue.appendleft(caller_thread_id)
                caller_thread.start()

            except KeyboardInterrupt:
                break

        self.kafka_consumer.close()
        self.kafka_producer.flush()

Exemple #30

0

Afficher le fichier

    # permanently failed delivery (after retries).
    def acked(err, msg):
        global delivered_records
        """Delivery report handler called on
        successful or failed delivery of message
        """
        if err is not None:
            print("Failed to deliver message: {}".format(err))
        else:
            delivered_records += 1
            print("Produced record to topic {} partition [{}] @ offset {}".
                  format(msg.topic(), msg.partition(), msg.offset()))

    for n in range(10):
        name_object = ccloud_lib.Name()
        name_object.name = "alice"
        count_object = ccloud_lib.Count()
        count_object.count = n
        print("Producing Avro record: {}\t{}".format(name_object.name,
                                                     count_object.count))
        producer.produce(topic=topic,
                         key=name_object,
                         value=count_object,
                         on_delivery=acked)
        producer.poll(0)

    producer.flush()

    print("{} messages were produced to topic {}!".format(
        delivered_records, topic))