def main(args):
    topic = args.topic
    delimiter = args.delimiter
    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': StringSerializer('utf_8')}

    producer_conf.update(sasl_conf(args))

    producer = SerializingProducer(producer_conf)

    print("Producing records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            msg_data = input(">")
            msg = msg_data.split(delimiter)
            if len(msg) == 2:
                producer.produce(topic=topic, key=msg[0], value=msg[1],
                                 on_delivery=delivery_report)
            else:
                producer.produce(topic=topic, value=msg[0],
                                 on_delivery=delivery_report)
        except KeyboardInterrupt:
            break

    print("\nFlushing {} records...".format(len(producer)))
    producer.flush()
 def __new__(cls):
     # Producer configuration. Must match Stimzi/Kafka configuration.
     config = {
         'bootstrap.servers': "jizt-cluster-kafka-bootstrap:9092",
         'client.id': socket.gethostname(),
         'key.serializer': StringSerializer('utf_8'),
         'value.serializer': StringSerializer('utf_8')
     }
     return SerializingProducer(config)
    def __init__(self, config: KafkaEmitterConfig):
        self.config = config
        schema_registry_conf = {
            "url": self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent,
                                ctx: SerializationContext) -> dict:
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        mce_avro_serializer = AvroSerializer(
            schema_str=getMetadataChangeEventSchema(),
            schema_registry_client=schema_registry_client,
            to_dict=convert_mce_to_dict,
        )

        def convert_mcp_to_dict(
            mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
            ctx: SerializationContext,
        ) -> dict:
            tuple_encoding = mcp.to_obj(tuples=True)
            return tuple_encoding

        mcp_avro_serializer = AvroSerializer(
            schema_str=getMetadataChangeProposalSchema(),
            schema_registry_client=schema_registry_client,
            to_dict=convert_mcp_to_dict,
        )

        # We maintain a map of producers for each kind of event
        producers_config = {
            MCE_KEY: {
                "bootstrap.servers": self.config.connection.bootstrap,
                "key.serializer": StringSerializer("utf_8"),
                "value.serializer": mce_avro_serializer,
                **self.config.connection.producer_config,
            },
            MCP_KEY: {
                "bootstrap.servers": self.config.connection.bootstrap,
                "key.serializer": StringSerializer("utf_8"),
                "value.serializer": mcp_avro_serializer,
                **self.config.connection.producer_config,
            },
        }

        self.producers = {
            key: SerializingProducer(value)
            for (key, value) in producers_config.items()
        }
Exemple #4
0
def producer_config(args):
    logger = logging.getLogger(__name__)
    return {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': StringSerializer('utf_8'),
        'security.protocol': 'sasl_plaintext',
        'sasl.mechanisms': 'OAUTHBEARER',
        # sasl.oauthbearer.config can be used to pass argument to your oauth_cb
        # It is not used in this example since we are passing all the arguments
        # from command line
        # 'sasl.oauthbearer.config': 'not-used',
        'oauth_cb': functools.partial(_get_token, args),
        'logger': logger,
    }
Exemple #5
0
def send_event_to_message_bus(**kwargs):  # pragma: no cover
    """
    Forward a SUBSCRIPTION_LICENSE_MODIFIED event to the settings.LICENSE_TOPIC_NAME queue on the event bus

    :param kwargs: event data sent by signal
    """
    license_data = kwargs.get('license', None)
    if not license_data or not isinstance(license_data,
                                          SubscriptionLicenseData):
        logger.error(
            "Received null or incorrect data from SUBSCRIPTION_LICENSE_MODIFIED"
        )
        return
    try:
        license_event_producer = ProducerFactory.get_or_create_event_producer(
            settings.LICENSE_TOPIC_NAME, StringSerializer('utf-8'),
            SubscriptionLicenseEventSerializer.get_serializer())
        license_event_data = {"license": license_data}
        message_key = license_data.enterprise_customer_uuid
        event_type = kwargs['signal'].event_type
        license_event_producer.produce(
            settings.LICENSE_TOPIC_NAME,
            key=message_key,
            value=license_event_data,
            on_delivery=verify_event,
            headers={EVENT_TYPE_HEADER_KEY: event_type})
        license_event_producer.poll()
    except ValueSerializationError as vse:
        logger.exception(vse)
    except Exception:  # pylint: disable=broad-except
        logger.exception("Unknown error sending license event to event bus")
Exemple #6
0
    def __init__(self, config: KafkaSinkConfig, ctx):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()

        schema_registry_conf = {
            'url': self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(SCHEMA_JSON_STR,
                                         schema_registry_client,
                                         to_dict=convert_mce_to_dict)

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)
Exemple #7
0
def getConfigs():

    sr_client_props = {
        'url': '<CCLOUD_SR_DNS>',
        'basic.auth.user.info': '<CCLOUD_SR_KEY>:<CCLOUD_SR_SECRET>'
    }

    sr_client = SchemaRegistryClient(sr_client_props)
    value_serializer = ProtobufSerializer(FoodPreferences_pb2.PersonFood,
                                          sr_client)

    configs = {
        'bootstrap.servers': '<CCLOUD_DNS>',
        'security.protocol': 'SASL_SSL',
        'sasl.mechanism': 'PLAIN',
        'sasl.username': '******',
        'sasl.password': '******',
        'client.id': 'pythonProduction',
        'compression.type': 'zstd',
        'retries': '10',
        'linger.ms': '5',
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': value_serializer
    }

    return configs
    def __init__(self,
                 producer_name,
                 value_schema,
                 groupID='KafkaAvroProducer'):

        # Consumer name for logging purposes
        self.logging_prefix = '[' + producer_name + '][KafkaAvroProducer]'

        # Schema Registry configuration
        self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = EventBackboneConfig.getProducerConfiguration(
            groupID, self.key_serializer, self.value_serializer)
        EventBackboneConfig.printProducerConfiguration(
            self.logging_prefix, self.producer_conf,
            self.schema_registry_conf['url'])
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
def write_to_kafka(bootstrap_servers, schema_registry_url, topic_name, data):

    print("Kafka Version                : ", confluent_kafka.version(),confluent_kafka.libversion())

    schema_registry_conf = {'url': schema_registry_url}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    value_avro_serializer = AvroSerializer(schemas.weather_source_schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    conf = {'bootstrap.servers': bootstrap_servers,
            'client.id': socket.gethostname(),
            'on_delivery': delivery_report,
            'key.serializer': string_serializer,
            'value.serializer': value_avro_serializer    
            }

    avroProducer = SerializingProducer(conf)
    
    key=datetime.date.today() + '~' + str(data['lat']) + '~' + str(data['lon'])
    message = json.dumps(data, cls=DatetimeEncoder)

    print("Key Type                     : ", type(key))
    print("Value Type                   : ", type(json.loads(message)))
  
    avroProducer.produce(topic=topic_name, key=key, value=json.loads(message))
    avroProducer.flush()
Exemple #10
0
    def from_file(cls, config_file_path, **kwargs):
        """
        config_file_path = path to the config file to use in defining this producer

        Possible keyword arguments:
        logger = the logger object to use
        !!!!! any other keyword arguments will be added to the configuration (with underscores replaced with dots) !!!!!
        """
        parser = ConfigFileParser(config_file_path, **kwargs)
        configs = parser.get_config_dict_for_groups(['cluster', 'producer'])
        for argname, arg in kwargs.items():
            if argname == 'logger':
                continue
            configs[argname.replace('_', '.')] = arg
        #if one of several recognized serializers have been given as config paramenters for the key/value serializer, replace them with the actual class
        names_to_classes = {
            'DoubleSerializer': DoubleSerializer(),
            'IntegerSerializer': IntegerSerializer(),
            'StringSerializer': StringSerializer(),
            'DataFileChunkSerializer': DataFileChunkSerializer(),
        }
        configs_to_check = ['key.serializer', 'value.serializer']
        for cfg in configs_to_check:
            if cfg in configs.keys():
                if configs[cfg] in names_to_classes:
                    configs[cfg] = names_to_classes[configs[cfg]]
        return cls(configs)
Exemple #11
0
    def __init__(self, producer_name, value_schema, groupID = 'KafkaAvroProducer',
                kafka_brokers = "", 
                kafka_user = "", 
                kafka_pwd = "", 
                kafka_cacert = "", 
                kafka_sasl_mechanism = "", 
                topic_name = ""):
        self.kafka_brokers = kafka_brokers
        self.kafka_user = kafka_user
        self.kafka_pwd = kafka_pwd
        self.kafka_sasl_mechanism = kafka_sasl_mechanism
        self.kafka_cacert = kafka_cacert
        self.topic_name = topic_name
        # Consumer name for logging purposes
        self.logging_prefix = '['+ producer_name + '][KafkaAvroProducer]'
        # Schema Registry configuration
        self.schema_registry_conf = {'url': config.SCHEMA_REGISTRY_URL}
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        print(value_schema)
        print(type(value_schema))
        value_schema=value_schema.strip()
        self.value_serializer = AvroSerializer(value_schema, self.schema_registry_client)
        
        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID,
                        self.key_serializer,
                        self.value_serializer)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
Exemple #12
0
    def __init__(self, config: KafkaEmitterConfig):
        self.config = config

        schema_registry_conf = {
            "url": self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(
            schema_str=SCHEMA_JSON_STR,
            schema_registry_client=schema_registry_client,
            to_dict=convert_mce_to_dict,
        )

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            "key.serializer": StringSerializer("utf_8"),
            "value.serializer": avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)
Exemple #13
0
def test_string_serialization(kafka_cluster, data, codec):
    """
    Tests basic unicode serialization/deserialization functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        data (unicode): input data

        codec (str): encoding type

    """
    topic = kafka_cluster.create_topic("serialization-string")

    producer = kafka_cluster.producer(value_serializer=StringSerializer(codec))

    producer.produce(topic, value=data)
    producer.flush()

    consumer = kafka_cluster.consumer(
        value_deserializer=StringDeserializer(codec))

    consumer.subscribe([topic])

    msg = consumer.poll()

    assert msg.value() == data

    consumer.close()
class Configuration:

    commit_log_topic: str
    snapshot_topic: str
    bootstrap_servers: str
    group_id: str
    batch_timeout_sec: int = 5
    messages_per_transaction: int = 2000

    store_consumer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'group.id': None,
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': False,
            'enable.auto.offset.store': False,
            'enable.partition.eof': True,
            'key.deserializer': StringDeserializer(),
            'value.deserializer': JSONDeserializer(),
            # 'stats_cb': publish_statistics,
            # 'statistics.interval.ms': 15000,
        })

    consumer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'group.id': None,
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': False,
            'enable.auto.offset.store': False,
            'enable.partition.eof': False,
            'key.deserializer': StringDeserializer(),
            'value.deserializer': JSONDeserializer(),
            # 'value.deserializer': DummyDeserializer(),
            # 'stats_cb': publish_statistics,
            # 'statistics.interval.ms': 15000,
        })

    producer: dict = field(
        default_factory=lambda: {
            'bootstrap.servers': None,
            'transactional.id': None,
            'transaction.timeout.ms': 60000,
            'enable.idempotence': True,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': JSONSerializer(),
            'debug': 'broker,eos',
        })

    def __post_init__(self):
        self.store_consumer['bootstrap.servers'] = \
            self.consumer['bootstrap.servers'] = \
            self.producer['bootstrap.servers'] = \
            self.bootstrap_servers

        self.store_consumer['group.id'] = \
            self.consumer['group.id'] = \
            self.producer['transactional.id'] = \
            self.group_id
Exemple #15
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_str = EventSchema
    schema_enriched_event_str = EnrichedEventSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_enriched_event_str,
                                     schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "latest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")
    session.row_factory = dict_factory

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()
            enrich(evt, session, producer, outputtopic)

        except Exception:
            print('Exception', sys.exc_info()[0])
            continue

    consumer.close()
def main(args):
    topic = args.topic

    schema_str = """
    {
        "namespace": "confluent.io.examples.serialization.avro",
        "name": "User",
        "type": "record",
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "favorite_number", "type": "int"},
            {"name": "favorite_color", "type": "string"}
        ]
    }
    """
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_registry_client, schema_str,
                                     user_to_dict)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_address = input("Enter address: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        address=user_address,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Exemple #17
0
def avro_messages_producer(schema):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema, schema_registry_client)
    string_serializer = StringSerializer('utf-8')

    producer_conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.serializer': string_serializer,
        'value.serializer': avro_serializer
    }

    return SerializingProducer(producer_conf)
Exemple #18
0
def send_record(args):
    """ Sends Record using a SerializingProducer & AvroSerializer """
    topic = args.topic.rstrip()

    schema_registry_config = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_config)

    avro_serializer = AvroSerializer(schema_registry_client, DATA_SCHEMA,
                                     data_to_dict)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "key.serializer": StringSerializer('utf_8'),
        "value.serializer": avro_serializer
    }
    producer = SerializingProducer(producer_config)

    split_incoming_data = args.record_value.split(',')
    if not len(split_incoming_data) == 7:  # Data Format Check
        print('** Error: Insufficient Incoming Data: ', split_incoming_data)
        raise Exception
    try:  # Data Format Check
        incoming_data = {
            'envId': int(split_incoming_data[0]),
            'whenCollected': str(split_incoming_data[1]),
            'timeLightOnMins': int(split_incoming_data[2]),
            'humidity': int(split_incoming_data[3]),
            'soilMoisture': int(split_incoming_data[4]),
            'temperature': int(split_incoming_data[5]),
            'waterConsumption': int(split_incoming_data[6])
        }
    except Exception as error:
        print('** Error Creating Dict of Data: ', error)

    print(f'Producing data records to topic {topic}. ^C to exit.')
    producer.poll(1)
    try:
        key = args.record_key if args.record_key else str(uuid4())
        data_object = Data(incoming_data)
        print('\t-Producing Avro record. . .')
        producer.produce(topic=topic,
                         key=key,
                         value=data_object,
                         on_delivery=delivery_report)
    except ValueError:
        print('\t-Invalid input, discarding record. . .')
    print('\nFlushing records. . .')
    producer.flush()
    def __init__(self, value_schema, groupID='KafkaAvroProducer'):

        # Schema Registry configuration
        self.schema_registry_conf = self.getSchemaRegistryConf()
        # Schema Registry Client
        self.schema_registry_client = SchemaRegistryClient(
            self.schema_registry_conf)

        # String Serializer for the key
        self.key_serializer = StringSerializer('utf_8')
        # Avro Serializer for the value
        self.value_serializer = AvroSerializer(value_schema,
                                               self.schema_registry_client)

        # Get the producer configuration
        self.producer_conf = self.getProducerConfiguration(groupID)
        # Create the producer
        self.producer = SerializingProducer(self.producer_conf)
Exemple #20
0
def test_consume_error(kafka_cluster):
    """
    Tests to ensure librdkafka errors are propagated as
    an instance of ConsumeError.
    """
    topic = kafka_cluster.create_topic("test_commit_transaction")
    consumer_conf = {'enable.partition.eof': True}

    producer = kafka_cluster.producer()
    producer.produce(topic=topic, value="a")
    producer.flush()

    consumer = kafka_cluster.consumer(consumer_conf,
                                      value_deserializer=StringSerializer())
    consumer.assign([TopicPartition(topic, 0, OFFSET_END)])

    with pytest.raises(ConsumeError, match="No more messages"):
        # Trigger EOF error
        consumer.poll()
def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    protobuf_serializer = ProtobufSerializer(user_pb2.User,
                                             schema_registry_client,
                                             {'use.deprecated.format': True})

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': protobuf_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = user_pb2.User(name=user_name,
                                 favorite_color=user_favorite_color,
                                 favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             partition=0,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except (KeyboardInterrupt, EOFError):
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
    def __init__(self, config_env):
        self.config = config_env
        self.topic_name = self.config["kafka_produce_topic"]

        conf = {
            'bootstrap.servers':
            self.config["bootstrap_servers"],
            'message.max.bytes':
            self.config["kafkaMaxMessageBytes"],
            'queue.buffering.max.ms':
            self.config["queue.buffering.max.ms"],
            'queue.buffering.max.messages':
            self.config["queue.buffering.max.messages"],
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self.__protobuf_serializer()
        }

        self.producer = SerializingProducer(conf)
Exemple #23
0
def test_consume_error(kafka_cluster):
    """
    Tests to ensure librdkafka errors are propagated as
    an instance of ConsumeError.
    """
    topic = kafka_cluster.create_topic("test_commit_transaction")
    consumer_conf = {'group.id': 'pytest', 'enable.partition.eof': True}

    producer = kafka_cluster.producer()
    producer.produce(topic=topic, value="a")
    producer.flush()

    consumer = kafka_cluster.consumer(consumer_conf,
                                      value_deserializer=StringSerializer())
    consumer.assign([TopicPartition(topic, 0, OFFSET_END)])

    with pytest.raises(ConsumeError) as exc_info:
        # Trigger EOF error
        consumer.poll()
    assert exc_info.value.args[0].code() == KafkaError._PARTITION_EOF, \
        "Expected _PARTITION_EOF, not {}".format(exc_info)
    def __init__(self,
                 bootstrap_servers,
                 rss_feeds,
                 topic='crawl-queue',
                 time_checkpoint_fn_base='scheduler_checkpoint'):
        self.bootstrap_servers = bootstrap_servers
        self.topic = topic

        self.feeds = rss_feeds

        self.time_checkpoints = dict()
        for spider_name in self.feeds.values():
            fn = f'{time_checkpoint_fn_base}_{spider_name}.txt'
            fn = fn.replace('/', '_')  # we don't want / in our pathnames.
            self.time_checkpoints[spider_name] = TimeCheckpoint(fn=fn)

        producer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': _json_serializer_wrapper
        }
        self.producer = SerializingProducer(producer_conf)
Exemple #25
0
    def produce(self, count: int):
        def increment(err, msg):
            assert err is None
            assert msg is not None
            assert msg.offset() == self.acked
            self.logger.debug("Acked offset %d", msg.offset())
            self.acked += 1

        producer = SerializingProducer({
            'bootstrap.servers':
            self.brokers,
            'key.serializer':
            StringSerializer('utf_8'),
            'value.serializer':
            self._make_serializer()
        })

        self.logger.info("Producing %d %s records to topic %s", count,
                         self.schema_type.name, self.topic)
        for i in range(count):
            # Prevent overflow of buffer
            while len(producer) > 50000:
                # Serve on_delivery callbacks from previous calls to produce()
                producer.poll(0.1)

            producer.produce(topic=self.topic,
                             key=str(uuid4()),
                             value=self._make_payload(i),
                             on_delivery=increment)
            self.produced += 1

        self.logger.info("Flushing records...")
        producer.flush()
        self.logger.info("Records flushed: %d", self.produced)
        while self.acked < count:
            producer.poll(0.01)
        self.logger.info("Records acked: %d", self.acked)
Exemple #26
0
def main(args):
    topic = args.topic

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(User.avro_schema(),
                                     schema_registry_client,
                                     user_to_dict)

    producer_conf = {'bootstrap.servers': args.bootstrap_servers,
                     'key.serializer': StringSerializer('utf_8'),
                     'value.serializer': avro_serializer}

    producer = SerializingProducer(producer_conf)

    print(f"Producing user records to topic {topic}. ^C to exit.")
    while True:
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic, key=str(uuid4()), value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Exemple #27
0
def main(args):
    topic = args.topic
    schema_str = EventSchema

    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_serializer = AvroSerializer(schema_str, schema_registry_client)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    list_type = [{
        "grilleIdent": "Numero 123T",
        "codeRetourServiceMetier": "code 23432543",
        "referer": "1qsd",
        "browserVersion": "qsdqsd",
        "androidUDID": "qsdqsdqsd",
        "iosIDFA": "qdqsdqsd",
        "appVersion": "qsdqsdqsdqsd",
        "idTmx": "qsdqsdqsd"
    }, {
        "numeroCompteBeneficiaire": "Numero 123T",
        "codePaysResidence": "code 23432543",
        "codePaysResidenceIso": "code 23432543",
        "adresseBeneficiaire": "code 23432543",
        "nomCompletBeneficiaire": "code 23432543",
        "idListeBeneficiaire": "code 23432543",
        "idBeneficiaire": "code 23432543",
        "modeValidation": 34,
        "bicBeneficiaire": "code 23432543",
        "idTmx": "code 23432543"
    }]
    while True:
        x = random.choice([0, 1])

        eventHeader = {
            "eventId": str(uuid4()),
            "dateTimeRef": 1589364605654,
            "nomenclatureEv": "Event Header",
            "canal": 1,
            "media": 2,
            "schemaVersion": "v0",
            "headerVersion": "v2",
            "serveur": "s1",
            "acteurDeclencheur": {
                "adresseIP": "127.0.0.1",
                "idTelematique": str(uuid4()),
                "idPersonne": "zahir"
            }
        }
        value = {
            "EventHeader": eventHeader,
            "EventBusinessContext": list_type[x]
        }
        print(value)
        producer.produce(topic=topic,
                         key=str(uuid4()),
                         value=value,
                         on_delivery=delivery_report)
        producer.flush()
        time.sleep(0.1)
# Create it by running
# protoc -I=. --python_out=. ./meal.proto

import meal_pb2
from confluent_kafka import SerializingProducer
from confluent_kafka.serialization import StringSerializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.protobuf import ProtobufSerializer

topic = 'meal'
schema_registry_client = SchemaRegistryClient({'url': 'http://t620.lan:8081'})
protobuf_serializer = ProtobufSerializer(meal_pb2.Meal, schema_registry_client)

producer_conf = {
    'bootstrap.servers': 't620.lan:9092',
    'key.serializer': StringSerializer('utf_8'),
    'value.serializer': protobuf_serializer
}

producer = SerializingProducer(producer_conf)

producer.poll(0.0)

mybeer = meal_pb2.Meal.DrinkItems(drink_name="beer")
mywine = meal_pb2.Meal.DrinkItems(drink_name="wine")

meal = meal_pb2.Meal(name='pizza', drink=[mybeer, mywine])
# Less meal (for testing kafkajs which seems to miss the drinks
#meal = meal_pb2.Meal(name='pizza', drink=[])

producer.produce(topic=topic, key=str(uuid4()), value=meal)
def main(args):
    topic = args.topic

    schema_str = """
    {
      "$schema": "http://json-schema.org/draft-07/schema#",
      "title": "User",
      "description": "A Confluent Kafka Python User",
      "type": "object",
      "properties": {
        "name": {
          "description": "User's name",
          "type": "string"
        },
        "favorite_number": {
          "description": "User's favorite number",
          "type": "number",
          "exclusiveMinimum": 0
        },
        "favorite_color": {
          "description": "User's favorite color",
          "type": "string"
        }
      },
      "required": [ "name", "favorite_number", "favorite_color" ]
    }
    """
    schema_registry_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    json_serializer = JSONSerializer(schema_registry_client, schema_str,
                                     user_to_dict)

    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': json_serializer
    }

    producer = SerializingProducer(producer_conf)

    print("Producing user records to topic {}. ^C to exit.".format(topic))
    while True:
        # Serve on_delivery callbacks from previous calls to produce()
        producer.poll(0.0)
        try:
            user_name = input("Enter name: ")
            user_address = input("Enter address: ")
            user_favorite_number = int(input("Enter favorite number: "))
            user_favorite_color = input("Enter favorite color: ")
            user = User(name=user_name,
                        address=user_address,
                        favorite_color=user_favorite_color,
                        favorite_number=user_favorite_number)
            producer.produce(topic=topic,
                             key=str(uuid4()),
                             value=user,
                             on_delivery=delivery_report)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Exemple #30
0
from datetime import datetime
import toml


def acked(err, msg):
    if err is not None:
        print("Failed to deliver message: %s: %s" % (str(msg), str(err)))
    else:
        print(
            'Message {} successfully produced to {} [{}] at offset {}'.format(
                msg.key(), msg.topic(), msg.partition(), msg.offset()))

# Load HopsWorks Kafka configuration
conf = toml.load('config.toml')
# Initialize a simple String serializer for the key
string_serializer = StringSerializer('utf_8')

producer_conf = {
    'bootstrap.servers': conf['hops']['url'] + ':' + conf['kafka']['port'],
    'security.protocol': 'SSL',
    'ssl.ca.location': conf['project']['ca_file'],
    'ssl.certificate.location': conf['project']['certificate_file'],
    'ssl.key.location': conf['project']['key_file'],
    'ssl.key.password': conf['project']['key_password'],
    'key.serializer': string_serializer,
    'value.serializer': string_serializer,
    'client.id': socket.gethostname()
}

print(producer_conf)