def __init__(self, driver, nameSalt):
        self.driver = driver
        self.fileName = "travis_correct_confluent_protobuf_protobuf"
        self.topic = self.fileName + nameSalt

        self.sensor = sensor_pb2.SensorReading()
        self.sensor.dateTime = 1234
        self.sensor.reading = 321.321
        self.sensor.device.deviceID = "555-4321"
        self.sensor.device.enabled = True

        self.sensor.float_val = 4321.4321
        self.sensor.int32_val = (1 << 31) - 1
        self.sensor.sint32_val = (1 << 31) - 1
        self.sensor.sint64_val = (1 << 63) - 1
        self.sensor.uint32_val = (1 << 32) - 1

        self.sensor.bytes_val = b'\xDE\xAD'
        self.sensor.double_array_val.extend([1 / 3, 32.21, 434324321])
        self.sensor.uint64_val = (1 << 64) - 1

        self.schema_registry_client = SchemaRegistryClient(
            {'url': driver.schemaRegistryAddress})
        self.keyProtobufSerializer = ProtobufSerializer(
            sensor_pb2.SensorReading, self.schema_registry_client)
        self.valueProtobufSerializer = ProtobufSerializer(
            sensor_pb2.SensorReading, self.schema_registry_client)
        producer_conf = {
            'bootstrap.servers': driver.kafkaAddress,
            'key.serializer': self.keyProtobufSerializer,
            'value.serializer': self.valueProtobufSerializer
        }

        self.protobufProducer = SerializingProducer(producer_conf)
def main(args):
    topic = args.topic
    delimiter = args.delimiter
    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': StringSerializer('utf_8')}

    producer_conf.update(sasl_conf(args))

    producer = SerializingProducer(producer_conf)

    print("Producing records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            msg_data = input(">")
            msg = msg_data.split(delimiter)
            if len(msg) == 2:
                producer.produce(topic=topic, key=msg[0], value=msg[1],
                                 on_delivery=delivery_report)
            else:
                producer.produce(topic=topic, value=msg[0],
                                 on_delivery=delivery_report)
        except KeyboardInterrupt:
            break

    print("\nFlushing {} records...".format(len(producer)))
    producer.flush()
Example #3
0
    def __init__(self, config: KafkaSinkConfig, ctx):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()

        schema_registry_conf = {
            'url': self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(SCHEMA_JSON_STR,
                                         schema_registry_client,
                                         to_dict=convert_mce_to_dict)

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)
    def __init__(self,
                 producer_name,
                 value_schema,
                 groupID='KafkaAvroProducer'):

        # Consumer name for logging purposes
        self.logging_prefix = '[' + producer_name + '][KafkaAvroProducer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = EventBackboneConfig.getProducerConfiguration(
            groupID, self.key_serializer, self.value_serializer)
        EventBackboneConfig.printProducerConfiguration(
            self.logging_prefix, self.producer_conf,
            self.schema_registry_conf['url'])
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
Example #5
0
    def __init__(self, producer_name, value_schema, groupID = 'KafkaAvroProducer',
                kafka_brokers = "", 
                kafka_user = "", 
                kafka_pwd = "", 
                kafka_cacert = "", 
                kafka_sasl_mechanism = "", 
                topic_name = ""):
        self.kafka_brokers = kafka_brokers
        self.kafka_user = kafka_user
        self.kafka_pwd = kafka_pwd
        self.kafka_sasl_mechanism = kafka_sasl_mechanism
        self.kafka_cacert = kafka_cacert
        self.topic_name = topic_name
        # Consumer name for logging purposes
        self.logging_prefix = '['+ producer_name + '][KafkaAvroProducer]'
        # Schema Registry configuration
        self.schema_registry_conf = {'url': config.SCHEMA_REGISTRY_URL}
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        print(value_schema)
        print(type(value_schema))
        value_schema=value_schema.strip()
        self.value_serializer = AvroSerializer(value_schema, self.schema_registry_client)
        
        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID,
                        self.key_serializer,
                        self.value_serializer)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
Example #6
0
def plain_avro_producer(running_cluster_config: Dict[str, str],
                        topic_and_partitions: Tuple[str, int],
                        records) -> SerializingProducer:
    """
    Creates a plain `confluent_kafka.avro.AvroProducer` that can be used to publish messages.
    """
    topic_id, _ = topic_and_partitions

    key, value = records[0]

    schema_registry_client = SchemaRegistryClient(
        {"url": running_cluster_config["schema-registry"]})
    key_schema = to_key_schema(key)
    avro_key_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client, schema_str=key_schema)
    value_schema = to_value_schema(value)
    avro_value_serializer = AvroSerializer(
        schema_registry_client=schema_registry_client, schema_str=value_schema)

    producer_config = {
        "bootstrap.servers": running_cluster_config["broker"],
        "key.serializer": avro_key_serializer,
        "value.serializer": avro_value_serializer,
    }

    producer = SerializingProducer(producer_config)

    producer.produce = partial(producer.produce, topic=topic_id)

    return producer
Example #7
0
    def __init__(self, config: KafkaEmitterConfig):
        self.config = config

        schema_registry_conf = {
            "url": self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(
            schema_str=SCHEMA_JSON_STR,
            schema_registry_client=schema_registry_client,
            to_dict=convert_mce_to_dict,
        )

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            "key.serializer": StringSerializer("utf_8"),
            "value.serializer": avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)
Example #8
0
    def __init__(self, consumer_topic, producer_topic, client_id,
                 bootstrap_servers, consumer_proto_class, producer_proto_class,
                 processor, max_thread_calls):
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.client_id = client_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_proto_class = consumer_proto_class
        self.producer_proto_class = producer_proto_class
        self.processor = processor
        self.max_thread_calls = max_thread_calls

        self.kafka_consumer = DeserializingConsumer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'group.id':
            self.client_id,
            'auto.offset.reset':
            "earliest",
            'value.deserializer':
            self.derializer
        })
        self.kafka_consumer.subscribe([self.consumer_topic])

        self.kafka_producer = SerializingProducer({
            'bootstrap.servers':
            self.bootstrap_servers,
            'queue.buffering.max.messages':
            500000,
            'value.serializer':
            self.serialize
        })

        self.thread_queue = deque(maxlen=self.max_thread_calls)
        self.latest_thread_queue_id = 1
Example #9
0
def main(args):
    topic = args.topic
    delimiter = args.delimiter

    producer_conf = producer_config(args)

    producer = SerializingProducer(producer_conf)

    print('Producing records to topic {}. ^C to exit.'.format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            msg_data = input(">")
            msg = msg_data.split(delimiter)
            if len(msg) == 2:
                producer.produce(topic=topic, key=msg[0], value=msg[1],
                                 on_delivery=delivery_report)
            else:
                producer.produce(topic=topic, value=msg[0],
                                 on_delivery=delivery_report)
        except KeyboardInterrupt:
            break

    print('\nFlushing {} records...'.format(len(producer)))
    producer.flush()
Example #10
0
class ProtoKafkaProducer:
    def __init__(self, config_env):
        self.config = config_env
        self.topic_name = self.config["kafka_produce_topic"]

        conf = {
            'bootstrap.servers':
            self.config["bootstrap_servers"],
            'message.max.bytes':
            self.config["kafkaMaxMessageBytes"],
            'queue.buffering.max.ms':
            self.config["queue.buffering.max.ms"],
            'queue.buffering.max.messages':
            self.config["queue.buffering.max.messages"],
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self.__protobuf_serializer()
        }

        self.producer = SerializingProducer(conf)

    def on_delivery(self, err, msg):
        if err:
            print("Message failed delivery, error: %s", err)
        else:
            print("Message delivered to %s on partition %s", msg.topic(),
                  msg.partition())

    def __protobuf_serializer(self):
        schema_registry_conf = {'url': self.config['schemaregistry.url']}
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        _proto_conf = {
            'auto.register.schemas': self.config['auto.register.schemas'],
        }

        return ProtobufSerializer(self.config['proto_msg_type'],
                                  schema_registry_client,
                                  conf=_proto_conf)

    def produce(self, kafka_msg, kafka_key):
        try:
            self.producer.produce(topic=self.topic_name,
                                  value=kafka_msg,
                                  key=kafka_key,
                                  on_delivery=self.on_delivery)

            self.producer.flush()

        except Exception as e:
            print("Error during producing to kafka topic. Stacktrace is %s", e)
Example #11
0
class NewsScheduler(object):
    def __init__(self,
                 bootstrap_servers,
                 rss_feeds,
                 topic='crawl-queue',
                 time_checkpoint_fn_base='scheduler_checkpoint'):
        self.bootstrap_servers = bootstrap_servers
        self.topic = topic

        self.feeds = rss_feeds

        self.time_checkpoints = dict()
        for spider_name in self.feeds.values():
            fn = f'{time_checkpoint_fn_base}_{spider_name}.txt'
            fn = fn.replace('/', '_')  # we don't want / in our pathnames.
            self.time_checkpoints[spider_name] = TimeCheckpoint(fn=fn)

        producer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': _json_serializer_wrapper
        }
        self.producer = SerializingProducer(producer_conf)

    def process_feed(self, feed_url, spider_name, flush=False):
        log.info(f"Processing feed '{feed_url}' via topic '{self.topic}'.")
        rss_feed = feedparser.parse(feed_url)

        for item in rss_feed.entries:
            item['spider'] = spider_name
            item_updated_time = struct_time_to_datetime(item.updated_parsed)

            if item_updated_time > self.time_checkpoints[
                    spider_name].checkpoint:
                log.info(f"New item: {item['title']}")

                self.producer.produce(topic=self.topic,
                                      key=str(uuid4()),
                                      value=dict(item))

        self.time_checkpoints[
            spider_name].checkpoint = struct_time_to_datetime(
                rss_feed.feed.updated_parsed)

        if flush:
            self.producer.flush()

    def run_loop(self, interval):
        for feed, spider in itertools.cycle(self.feeds.items()):
            self.process_feed(feed, spider)
            time.sleep(interval)
Example #12
0
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 value_serializer=None,
                 config=None):

        producer_config = {
            "bootstrap.servers": bootstrap_servers,
            "value.serializer": value_serializer
        }
        if config:
            producer_config.update(config)

        self.producer = SerializingProducer(producer_config)
        self.topic = topic
class KafkaAvroProducer:
    def __init__(self,
                 producer_name,
                 value_schema,
                 groupID='KafkaAvroProducer'):

        # Consumer name for logging purposes
        self.logging_prefix = '[' + producer_name + '][KafkaAvroProducer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = EventBackboneConfig.getProducerConfiguration(
            groupID, self.key_serializer, self.value_serializer)
        EventBackboneConfig.printProducerConfiguration(
            self.logging_prefix, self.producer_conf,
            self.schema_registry_conf['url'])
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
        if err is not None:
            print(
                '[KafkaAvroProducer] - [ERROR] - Message delivery failed: {}'.
                format(err))
        else:
            print('[KafkaAvroProducer] - Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def publishEvent(self, key, value, topicName='kafka-avro-producer'):
        # Produce the Avro message
        self.producer.produce(topic=topicName,
                              value=value,
                              key=key,
                              on_delivery=self.delivery_report)
        # Flush
        self.producer.flush()
    def create_producer(self, registry_client):
        """
        Creates a SerializingProducer object to produce to kafka topic

        :param registry_client: SchemaRegistryClient
            get this from register_client()

        :return: SerializingProducer Object
            based on config values
        """
        metadata_schema = None

        if self.metadata_type == "COLLECTION":
            metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str

        if self.metadata_type == "GRANULE":
            metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str

        metadata_serializer = AvroSerializer(metadata_schema, registry_client)
        producer_conf = {'bootstrap.servers': self.brokers}

        if self.security:
            producer_conf['security.protocol'] = 'SSL'
            producer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
            producer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
            producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc']

        meta_producer_conf = producer_conf
        meta_producer_conf['value.serializer'] = metadata_serializer

        metadata_producer = SerializingProducer(meta_producer_conf)
        return metadata_producer
    def producer(self, conf=None, key_serializer=None, value_serializer=None):
        """
        Returns a producer bound to this cluster.

        Args:
            conf (dict): Producer configuration overrides

            key_serializer (Serializer): serializer to apply to message key

            value_serializer (Deserializer): serializer to apply to
                message value

        Returns:
            Producer: A new SerializingProducer instance

        """
        client_conf = self.client_conf(conf)

        if key_serializer is not None:
            client_conf['key.serializer'] = key_serializer

        if value_serializer is not None:
            client_conf['value.serializer'] = value_serializer

        return SerializingProducer(client_conf)
Example #16
0
 def __init__(self,
              topic='test',
              client_id='producer1',
              bootstrap_servers='localhost:9092',
              video_reader=None):
     self.topic = topic
     self.video_reader = video_reader
     self.kafka_producer = SerializingProducer({
         'bootstrap.servers':
         bootstrap_servers,
         'value.serializer':
         self.video_reader.serialize,
         'queue.buffering.max.messages':
         500000
     })
     self.delivered_records = 0
     self.start_time = 0
 def __new__(cls):
     # Producer configuration. Must match Stimzi/Kafka configuration.
     config = {
         'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
         'client.id': socket.gethostname(),
         'key.serializer': StringSerializer('utf_8'),
         'value.serializer': StringSerializer('utf_8')
     }
     return SerializingProducer(config)
 def _init_producer(config: Dict) -> SerializingProducer:
     """config must contain:
         'bootstrap.servers'
         'value.serializer'
     but may contain every other kafka setting as well
     """
     assert "bootstrap.servers" in config.keys()
     assert "value.serializer" in config.keys()
     return SerializingProducer(config)
    def __init__(self, value_schema, groupID='KafkaAvroProducer'):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_registry_client, schema_str,
                                     user_to_dict)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_address = input("Enter address: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        address=user_address,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Example #21
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "latest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            enrich(evt, session, producer, outputtopic)

        except Exception:
            print('Exception', sys.exc_info()[0])
            continue

    consumer.close()
Example #22
0
    def __init__(self, config_env):
        self.config = config_env
        self.topic_name = self.config["kafka_produce_topic"]

        conf = {
            'bootstrap.servers':
            self.config["bootstrap_servers"],
            'message.max.bytes':
            self.config["kafkaMaxMessageBytes"],
            'queue.buffering.max.ms':
            self.config["queue.buffering.max.ms"],
            'queue.buffering.max.messages':
            self.config["queue.buffering.max.messages"],
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self.__protobuf_serializer()
        }

        self.producer = SerializingProducer(conf)
Example #23
0
def produce(goal):
    count = 0
    reusableProducer = SerializingProducer(getConfigs())
    while (count < goal):
        try:
            reusableProducer.produce(topic='myprototopic',
                                     key=str(uuid4()),
                                     value=generateRecord(),
                                     on_delivery=getReport)
            # print("In process:{}".format(multiprocessing.current_process().name))
            reusableProducer.poll(0.0)
        except KeyboardInterrupt:
            break
        except BufferError:
            sys.stderr.write(
                '%% Local producer queue is full (%d messages awaiting delivery): flushing...\n'
                % len(reusableProducer))
            reusableProducer.flush()

    print("Flushing one producer thread")
    reusableProducer.flush()
def write_to_kafka(bootstrap_servers, schema_registry_url, topic_name, data):

    print("Kafka Version                : ", confluent_kafka.version(),confluent_kafka.libversion())

    schema_registry_conf = {'url': schema_registry_url}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    value_avro_serializer = AvroSerializer(schemas.weather_source_schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    conf = {'bootstrap.servers': bootstrap_servers,
            'client.id': socket.gethostname(),
            'on_delivery': delivery_report,
            'key.serializer': string_serializer,
            'value.serializer': value_avro_serializer    
            }

    avroProducer = SerializingProducer(conf)
    
    key=datetime.date.today() + '~' + str(data['lat']) + '~' + str(data['lon'])
    message = json.dumps(data, cls=DatetimeEncoder)

    print("Key Type                     : ", type(key))
    print("Value Type                   : ", type(json.loads(message)))
  
    avroProducer.produce(topic=topic_name, key=key, value=json.loads(message))
    avroProducer.flush()
Example #25
0
    def _commit(self) -> None:
        """
        Retrieves the current offset by calling :meth:`pyconnect.pyconnectsource.PyConnectSource.get_index` and
        publishes it to the offset topic that is defined in this sources :class:`pyconnect.config.SourceConfig`
        instance.
        """
        idx = self.get_index()
        idx_schema = to_value_schema(idx)
        avro_value_serializer = AvroSerializer(
            schema_registry_client=self.schema_registry_client,
            schema_str=idx_schema)

        producer_config = {
            "bootstrap.servers": self.config["bootstrap.servers"],
            "key.serializer": None,
            "value.serializer": avro_value_serializer,
            **self.config["kafka_opts"],
            **self.config["kafka_producer_opts"],
        }

        offset_producer = SerializingProducer(producer_config)
        offset_producer.produce(key=None,
                                value=idx,
                                topic=self.config["offset_topic"])
        offset_producer.flush()
Example #26
0
    def __init__(self, config: KafkaEmitterConfig):
        self.config = config
        schema_registry_conf = {
            "url": self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent,
                                ctx: SerializationContext) -> dict:
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        mce_avro_serializer = AvroSerializer(
            schema_str=getMetadataChangeEventSchema(),
            schema_registry_client=schema_registry_client,
            to_dict=convert_mce_to_dict,
        )

        def convert_mcp_to_dict(
            mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
            ctx: SerializationContext,
        ) -> dict:
            tuple_encoding = mcp.to_obj(tuples=True)
            return tuple_encoding

        mcp_avro_serializer = AvroSerializer(
            schema_str=getMetadataChangeProposalSchema(),
            schema_registry_client=schema_registry_client,
            to_dict=convert_mcp_to_dict,
        )

        # We maintain a map of producers for each kind of event
        producers_config = {
            MCE_KEY: {
                "bootstrap.servers": self.config.connection.bootstrap,
                "key.serializer": StringSerializer("utf_8"),
                "value.serializer": mce_avro_serializer,
                **self.config.connection.producer_config,
            },
            MCP_KEY: {
                "bootstrap.servers": self.config.connection.bootstrap,
                "key.serializer": StringSerializer("utf_8"),
                "value.serializer": mcp_avro_serializer,
                **self.config.connection.producer_config,
            },
        }

        self.producers = {
            key: SerializingProducer(value)
            for (key, value) in producers_config.items()
        }
Example #27
0
    def __init__(self,
                 bootstrap_servers,
                 rss_feeds,
                 topic='crawl-queue',
                 time_checkpoint_fn_base='scheduler_checkpoint'):
        self.bootstrap_servers = bootstrap_servers
        self.topic = topic

        self.feeds = rss_feeds

        self.time_checkpoints = dict()
        for spider_name in self.feeds.values():
            fn = f'{time_checkpoint_fn_base}_{spider_name}.txt'
            fn = fn.replace('/', '_')  # we don't want / in our pathnames.
            self.time_checkpoints[spider_name] = TimeCheckpoint(fn=fn)

        producer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': _json_serializer_wrapper
        }
        self.producer = SerializingProducer(producer_conf)
Example #28
0
def send_record(args):
    """ Sends Record using a SerializingProducer & AvroSerializer """
    topic = args.topic.rstrip()

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_serializer = AvroSerializer(schema_registry_client, DATA_SCHEMA,
                                     data_to_dict)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "key.serializer": StringSerializer('utf_8'),
        "value.serializer": avro_serializer
    }
    producer = SerializingProducer(producer_config)

    split_incoming_data = args.record_value.split(',')
    if not len(split_incoming_data) == 7:  # Data Format Check
        print('** Error: Insufficient Incoming Data: ', split_incoming_data)
        raise Exception
    try:  # Data Format Check
        incoming_data = {
            'envId': int(split_incoming_data[0]),
            'whenCollected': str(split_incoming_data[1]),
            'timeLightOnMins': int(split_incoming_data[2]),
            'humidity': int(split_incoming_data[3]),
            'soilMoisture': int(split_incoming_data[4]),
            'temperature': int(split_incoming_data[5]),
            'waterConsumption': int(split_incoming_data[6])
        }
    except Exception as error:
        print('** Error Creating Dict of Data: ', error)

    print(f'Producing data records to topic {topic}. ^C to exit.')
    producer.poll(1)
    try:
        key = args.record_key if args.record_key else str(uuid4())
        data_object = Data(incoming_data)
        print('\t-Producing Avro record. . .')
        producer.produce(topic=topic,
                         key=key,
                         value=data_object,
                         on_delivery=delivery_report)
    except ValueError:
        print('\t-Invalid input, discarding record. . .')
    print('\nFlushing records. . .')
    producer.flush()
Example #29
0
class Producer:
    def __init__(self,
                 bootstrap_servers: str,
                 topic: str,
                 value_serializer=None,
                 config=None):

        producer_config = {
            "bootstrap.servers": bootstrap_servers,
            "value.serializer": value_serializer
        }
        if config:
            producer_config.update(config)

        self.producer = SerializingProducer(producer_config)
        self.topic = topic

    def send(self, key=None, value=None, on_delivery=default_callback):
        self.producer.produce(self.topic,
                              key=key,
                              value=value,
                              on_delivery=on_delivery)
        self.producer.flush()
Example #30
0
def avro_messages_producer(schema):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    producer_conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.serializer': string_serializer,
        'value.serializer': avro_serializer
    }

    return SerializingProducer(producer_conf)