def test_produce_primitive_string_key(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                          key_schema=key_schema)
 def test_produce_with_empty_key_no_schema(self):
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_value_schema=value_schema)
     with self.assertRaises(KeySerializerError):
         producer.produce(topic='test', value=0.0, key='')
 def test_produce_with_custom_registry(self):
     schema_registry = MockSchemaRegistryClient()
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({}, schema_registry=schema_registry)
     producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                      key_schema=key_schema)
 def test_produce_with_empty_key_value_with_schema(self):
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_key_schema=key_schema,
                             default_value_schema=value_schema)
     producer.produce(topic='test', value=0.0, key='')
Beispiel #5
0
 def test_produce_primitive_string_key(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer(
         {'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test',
                          value={"name": 'abc"'},
                          value_schema=value_schema,
                          key='mykey',
                          key_schema=key_schema)
Beispiel #6
0
    def __init__(self, kafkaAddress, schemaRegistryAddress,
                 kafkaConnectAddress, credentialPath, testVersion):
        self.testVersion = testVersion
        self.credentialPath = credentialPath
        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            testWarehouse = credentialJson["warehouse"]
            pk = credentialJson["encrypted_private_key"]
            pk_passphrase = credentialJson["private_key_passphrase"]

        self.TEST_DATA_FOLDER = "./test_data/"
        self.httpHeader = {
            'Content-type': 'application/json',
            'Accept': 'application/json'
        }

        self.SEND_INTERVAL = 0.01  # send a record every 10 ms
        self.VERIFY_INTERVAL = 60  # verify every 60 secs
        self.MAX_RETRY = 120  # max wait time 120 mins
        self.MAX_FLUSH_BUFFER_SIZE = 5000  # flush buffer when 10000 data was in the queue

        self.kafkaConnectAddress = kafkaConnectAddress
        self.schemaRegistryAddress = schemaRegistryAddress
        self.kafkaAddress = kafkaAddress

        self.adminClient = AdminClient({"bootstrap.servers": kafkaAddress})
        self.producer = Producer({'bootstrap.servers': kafkaAddress})
        self.avroProducer = AvroProducer({
            'bootstrap.servers':
            kafkaAddress,
            'schema.registry.url':
            schemaRegistryAddress
        })

        reg = "[^\/]*snowflakecomputing"  # find the account name
        account = re.findall(reg, testHost)
        if len(account) != 1 or len(account[0]) < 20:
            print(
                datetime.now().strftime("%H:%M:%S "),
                "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443"
            )

        pkb = parsePrivateKey(pk, pk_passphrase)
        self.snowflake_conn = snowflake.connector.connect(
            user=testUser,
            private_key=pkb,
            account=account[0][:-19],
            warehouse=testWarehouse,
            database=testDatabase,
            schema=testSchema)
Beispiel #7
0
    def __init__(self, args):
        self.args = args

        self.avro_producer = AvroProducer(
            {
                'bootstrap.servers': self.args.brokers,
                'schema.registry.url': self.args.registry
            },
            default_key_schema=avro.loads(self.args.keyschema),  # key schema
            default_value_schema=avro.loads(self.args.schema)  # value schema
        )
Beispiel #8
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            "bootstrap.servers": BOOTSTRAP_SERVER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL,
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=key_schema,
                                     default_value_schema=value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        admin_client = AdminClient({'bootstrap.servers': BOOTSTRAP_SERVER_URL})
        admin_client.create_topics([
            NewTopic(topic=self.topic_name,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas)
        ])

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        if self.producer:
            self.producer.flush(timeout=10.0)

    @staticmethod
    def time_millis():
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=6,
        num_replicas=2,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            # TODO
            # TODO
            # TODO
            "bootstrap.servers":"PLAINTEXT://localhost:9092",
            "schema.registry.url":"http://localhost:8081"
        }
        self.client = AdminClient(
                            {
                              # 'debug': 'admin',                       
                             "bootstrap.servers": "localhost:9092"
                             }
                        )
        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        # self.producer = AvroProducer(
        # )
#         avroProducer = AvroProducer({
#     'bootstrap.servers': 'mybroker,mybroker2',
#     'on_delivery': delivery_report,
#     'schema.registry.url': 'http://schema_registry_host:port'
#     }, default_key_schema=key_schema, default_value_schema=value_schema)

        self.producer = AvroProducer(
            config = self.broker_properties,
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema
        )
Beispiel #10
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            'bootstrap.servers': 'PLAINTEXT://localhost:9092',
            'schema.registry.url': 'http://localhost:8081'
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # Configure the AvroProducer
        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        client = AdminClient(
            {'bootstrap.servers': self.broker_properties['bootstrap.servers']})
        new_topic = NewTopic(self.topic_name,
                             num_partitions=self.num_partitions,
                             replication_factor=self.num_replicas)
        client.create_topics([new_topic])

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        if self.producer is not None:
            self.producer.flush()

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
            self,
            topic_name,
            key_schema,
            value_schema=None,
            num_partitions=1,
            num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            "id": 1,
            "host": "localhost",
            "port": 29092
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer({
            'bootstrap.servers': self.broker_properties,
            'schema.registry.url': 'http://localhost:8091'
        }, default_key_schema=key_schema, default_value_schema=value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        NewTopic(self.topic_name, self.num_partitions, self.num_replicas)

        logger.info("topic creation kafka integration complete")

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        self.producer.flush()
        logger.info("producer close incomplete - skipping")

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self._client = None

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "schema.registry.url": "http://localhost:8081",
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "cleanup.policy": "delete",
            "compression.type": "lz4",
            "delete.retention.ms": "2000",
            "file.delete.delay.ms": "2000",
            "linger.ms": 1000,
            "batch.num.messages": 100,
            "on_delivery": delivery_report
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(
            {
                "bootstrap.servers": "PLAINTEXT://localhost:9092",
                "schema.registry.url": "http://localhost:8081"
            },
            default_key_schema=key_schema,
            default_value_schema=value_schema)

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)
class KafkaProducer:
    def __init__(self,
                 kafka_env='LOCAL',
                 kafka_brokers="",
                 kafka_apikey="",
                 schema_registry_url=""):
        self.kafka_env = kafka_env
        self.kafka_brokers = kafka_brokers
        self.kafka_apikey = kafka_apikey
        self.schema_registry_url = schema_registry_url

    def prepareProducer(self,
                        groupID="pythonproducers",
                        key_schema="",
                        value_schema=""):
        options = {
            'bootstrap.servers': self.kafka_brokers,
            'schema.registry.url': self.schema_registry_url,
            'group.id': groupID
        }
        # We need this test as local kafka does not expect SSL protocol.
        if (self.kafka_env != 'LOCAL'):
            options['security.protocol'] = 'SASL_SSL'
            options['sasl.mechanisms'] = 'PLAIN'
            options['sasl.username'] = '******'
            options['sasl.password'] = self.kafka_apikey
        if (self.kafka_env == 'ICP'):
            options['ssl.ca.location'] = os.environ['PEM_CERT']
            options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT']
        print("--- This is the configuration for the producer: ---")
        print(options)
        print("---------------------------------------------------")
        self.producer = AvroProducer(options,
                                     default_key_schema=key_schema,
                                     default_value_schema=value_schema)

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            print('[ERROR] - Message delivery failed: {}'.format(err))
        else:
            print('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def publishEvent(self, topicName, value, key):
        # Important: value DOES NOT come in JSON format from ContainerAvroProducer.py. Therefore, we must convert it to JSON format first
        self.producer.produce(topic=topicName,
                              value=json.loads(value),
                              key=json.loads(value)[key],
                              callback=self.delivery_report)
        self.producer.flush()
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        # Broker Properties

        # Documentation for the Producer API is available in
        # https://docs.confluent.io/current/clients/python.html
        # The avro producer is just wrapper on top of the standard
        # confluent python producer. The supported configuration values are dictated
        # by the underlying librdkafka, written in C. The documentation of all available
        # properties is available in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        self.broker_properties = {

            # Default Kafka configurations
            'bootstrap.servers': 'PLAINTEXT://localhost:9092',
            'client.id': 'simulation_producer',

            # Avro schema
            'schema.registry.url': 'http://localhost:8081',

            # Batching configurations
            'linger.ms': 0.5,  # Number of ms to wait to accumulate messages to send
            'batch.num.messages': 100,  # Number of messages to accumulate before sending
            'queue.buffering.max.kbytes': 2097151  # Set to 2GB - default is 16 GB
        }

        # Configure admin client
        self.admin_client = AdminClient(
            {'bootstrap.servers': self.broker_properties['bootstrap.servers']}
        )

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # Configure the AvroProducer
        self.producer = AvroProducer(
            config=self.broker_properties
        )
class Producer:
    """Defines and provides common functionality amongst Producers"""
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {"bootstrap.servers": KAFKA_BROKER_URL}

        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        schema_registry = CachedSchemaRegistryClient("http://localhost:8081")

        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema,
                                     schema_registry=schema_registry)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        client = AdminClient(self.broker_properties)

        topic = NewTopic(self.topic_name,
                         num_partitions=self.num_partitions,
                         replication_factor=self.num_replicas)

        client.create_topics([topic])

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        Producer.existing_topics = set([])
        self.producer.close()

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
 def __init__(self):
     config = self.load_config(self.CONFIG_FILE)
     sc = CachedSchemaRegistryClient(url=config['kafkaSchemaRegistryUrl'])
     self.topic = config['kafkaTopics'][0]
     key_schema = sc.get_latest_schema(self.topic + "-key")[1]
     val_schema = sc.get_latest_schema(self.topic + "-value")[1]
     self.producer = AvroProducer(
         {
             'bootstrap.servers': config['kafkaBootstrapServers'],
             'schema.registry.url': config['kafkaSchemaRegistryUrl']
         },
         default_key_schema=key_schema,
         default_value_schema=val_schema)
Beispiel #17
0
 def __init__(self, name, emit_datum, broker, schema_registry_url):
     self.name = name
     self.emit_datum = emit_datum
     schema = avro.loads(get_schema_def())
     self.producer = AvroProducer(
         {
             'bootstrap.servers': broker,
             'schema.registry.url': schema_registry_url,
             **get_sr_config_from_environment(),
             **get_kafka_config_from_environment(),
         },
         default_key_schema=schema,
         default_value_schema=schema)
Beispiel #18
0
 def __init__(self):
     url = "http://localhost:8088"
     self.api_client = KSQLAPI(url)
     self.topic = "test08"
     self.bootstrap_servers = "localhost:9092"
     if utils.check_kafka_available(self.bootstrap_servers):
         value_schema_str = """ 
         { 
             "type": "record", 
             "namespace": "com.example", 
             "name": "value", 
             "fields": [ 
                 {"name":"LOCATION", "type":"string"}, 
                 {"name":"DATETIME", "type":"string"}, 
                 {"name":"SENTIMENT", "type":"string"}, 
                 {"name":"TEXT", "type":"string"} 
             ] 
         } 
         """
         key_schema_str = """ 
         { 
             "type": "record", 
             "namespace": "com.example", 
             "name": "key", 
             "fields": [ 
                 {"name":"LOCATION", "type":"string"}, 
                 {"name":"DATETIME", "type":"string"}, 
                 {"name":"SENTIMENT", "type":"string"}, 
                 {"name":"TEXT", "type":"string"} 
             ] 
         } 
         """
         value_schema = avro.loads(value_schema_str)
         key_schema = avro.loads(key_schema_str)
         self.key = {
             "LOCATION": "LOCATION",
             "DATETIME": "DATETIME",
             "SENTIMENT": "SENTIMENT",
             "TEXT": "TEXT"
         }
         self.producer = AvroProducer(
             {
                 'bootstrap.servers': self.bootstrap_servers,
                 'on_delivery': delivery_report,
                 'schema.registry.url': 'http://localhost:8081'
             },
             default_key_schema=None,
             default_value_schema=value_schema)
     else:
         print("Could not connect to Kafka")
         exit(-1)
Beispiel #19
0
class PeriodicProducer(object):
    def __init__(self, bootstrap_servers, schema_registry_url, topic):
        value_schema = avro.load('resources/workshop.avsc')
        config = {
            'bootstrap.servers': bootstrap_servers,
            'schema.registry.url': 'http://{0}'.format(schema_registry_url)
        }
        self.topic = topic
        self.stopped = True
        self.end_time = 0
        self.producer = AvroProducer(config, default_value_schema=value_schema)

    def __get_props(self):
        if random() > 0.5:
            p = random_movie()
            return {'title': p[0], 'properties': {'release_year': p[1]}}
        else:
            p = random_series()
            return {'title': p[0], 'properties': {'seasons': p[1]}}

    def __loop__(self):
        now = int(time())
        props = self.__get_props()
        document = {
            'timestamp': now,
            'user': random_user(),
            'title': props['title'],
            'tags': random_tags(),
            'comment': random_sentence(),
            'rating': randint(0, 9),
            'properties': props['properties']
        }
        print 'Sending {0} to kafka.'.format(document)
        self.producer.produce(topic=self.topic, value=document)

        if not self.stopped and now < self.end_time:
            Timer(1, self.__loop__).start()

    def run(self, period):
        self.stopped = False
        self.end_time = int(time()) + period
        self.__loop__()

    def is_stopped(self):
        return self.stopped

    def is_running(self):
        return not self.stopped

    def stop(self):
        self.stopped = True
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        BROKER_URL = "PLAINTEXT://localhost:9092"
        SCHEMA_REGISTRY = "http://localhost:8081"

        self.broker_properties = {"bootstrap.servers": BROKER_URL}

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(
            self.broker_properties,
            schema_registry=CachedSchemaRegistryClient(
                'http://localhost:8081'),
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        AdminClient(self.broker_properties).create_topics(
            [NewTopic(self.topic_name, num_partitions=1)])

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        if self.producer is not None:
            self.producer.flush()

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
 def __init__(self,
              value_schmea_loc: str = None,
              key_nameL: str = 'ex-key',
              topic: str = 'test'):
     if not value_schmea_loc:
         raise NoAvroSchemaFileException()
     self.value_schema = avro.load(value_schmea_loc)
     self.avro_producer = AvroProducer(
         {
             'bootstrap.servers': 'localhost:9092',
             'schema.registry.url': 'http://127.0.0.1:8081'
         },
         default_value_schema=self.value_schema)
     self.topic = topic
    def __init__(self, schema_name, topic):
        kafka_cfg = parse_kafka_config()
        key_schema, value_schema = load_avro_schema_from_registry(
            schema_name, kafka_cfg['schema-registry-url'])

        producer_config = {
            "bootstrap.servers": kafka_cfg['bootstrap-servers'],
            "schema.registry.url": kafka_cfg['schema-registry-url']
        }

        self.topic = topic
        self.producer = AvroProducer(producer_config,
                                     default_key_schema=key_schema,
                                     default_value_schema=value_schema)
Beispiel #23
0
def produce(conf, data_file, schema_record):
    """
        Produce MetadataChangeEvent records
    """
    producer = AvroProducer(conf,
                            default_value_schema=avro.load(schema_record))

    print("Producing MetadataChangeEvent records to topic {}. ^c to exit.".
          format(topic))

    with open(data_file) as fp:
        cnt = 0
        while True:
            sample = fp.readline()
            cnt += 1
            if not sample:
                break
            try:
                content = ast.literal_eval(sample.strip())
                producer.produce(topic=topic, value=content)
                producer.poll(0)
                print("  MCE{}: {}".format(cnt, sample))
            except KeyboardInterrupt:
                break
            except ValueError as e:
                print("Message serialization failed {}".format(e))
                break

    print("Flushing records...")
    producer.flush()
class Demonstrator:

    def __init__(self, broker_urls, registry_url, topic):
        self.broker_urls = broker_urls
        self.registry_url = registry_url

        if topic == "login":
            self.value_schema = schema_login
            self.key_schema = schema_login
        if topic == "message":
            self.value_schema = schema_message
            self.key_schema = schema_message
        if topic == "mouse":
            self.value_schema = schema_mouse
            self.key_schema = schema_mouse

        self.avroProducer = AvroProducer({
            'bootstrap.servers': self.broker_urls,
            'on_delivery': delivery_report,
            'schema.registry.url': self.registry_url,
        }, default_key_schema=self.key_schema, default_value_schema=self.value_schema)

    def checkLocation(self, loc,port):
        a_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        location = (loc, port)
        result_of_check = self.a_socket.connect_ex(location)
        if result_of_check == 0:
            print("Port is open")
        else:
            print("Port is not open")
        a_socket.close()

    def produceMessage(self, topic, value, key):
        self.avroProducer.produce(topic=topic, value=value, key=key)
        self.avroProducer.flush()


    def createTopic(self, *topic1, partitions, replication):
        a = AdminClient({'bootstrap.servers': self.broker_url})

        new_topics = [NewTopic(topic, num_partitions=int(partitions), replication_factor=int(replication)) for topic in [topic1]]

        fs = a.create_topics(new_topics)

        for topic, f in fs.items():
            try:
                f.result()
                print("Topic {} created".format(topic))
            except Exception as e:
                print("Failed to create topic {}: {}".format(topic, e))
Beispiel #25
0
    def send_to_kafka():
        Timer(10.0, send_to_kafka).start()
        try:
            print("running")
            avro_producer = AvroProducer(
                {
                    'bootstrap.servers': 'up01:9092,up02:9092,up03:9092',
                    'schema.registry.url': 'http://up04:8081'
                },
                default_key_schema=key_schema,
                default_value_schema=value_schema)

            value = read_from_sense_hat()

            print(value)

            avro_producer.poll(0)

            avro_producer.produce(topic='test_avro_2',
                                  value=value,
                                  key=key,
                                  callback=delivery_report)
            avro_producer.flush()

        except Exception as e:
            logging.error(traceback.format_exc())
    def __init__(self,
                 path_to_scheme,
                 broker_url=os.environ.get('KAFKA_BROKER_URL'),
                 topic=os.environ.get('PRODUCER_TOPIC'),
                 schema_registry_url='http://schema_registry:8081'):
        self.value_scheme = avro.load(path_to_scheme)

        self.p = AvroProducer(
            {
                'bootstrap.servers': broker_url,
                'schema.registry.url': schema_registry_url
            },
            default_value_schema=self.value_scheme)
        self.topic = topic
Beispiel #27
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    # See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/cached_schema_registry_client.py#L47
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
    p = AvroProducer({"bootstrap.servers": BROKER_URL},
                     schema_registry=schema_registry)
    while True:
        # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
        p.produce(topic=topic_name,
                  value=asdict(ClickEvent()),
                  value_schema=ClickEvent.schema)
        await asyncio.sleep(1.0)
Beispiel #28
0
def produce(topic, conf):
    """
        Produce User records
    """

    from confluent_kafka.avro import AvroProducer

    producer = AvroProducer(conf, default_value_schema=record_schema)

    print("Producing user records to topic {}. ^c to exit.".format(topic))
    while True:
        # Instantiate new User, populate fields, produce record, execute callbacks.
        record = User()
        try:
            record.name = input("Enter name: ")
            record.favorite_number = int(input("Enter favorite number: "))
            record.favorite_color = input("Enter favorite color: ")

            # The message passed to the delivery callback will already be serialized.
            # To aid in debugging we provide the original object to the delivery callback.
            producer.produce(topic=topic,
                             value=record.to_dict(),
                             callback=lambda err, msg, obj=record: on_delivery(
                                 err, msg, obj))
            # Serve on_delivery callbacks from previous asynchronous produce()
            producer.poll(0)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
Beispiel #29
0
class vroducer():
    def __init__(self, avro_schema, BOOTSTRAP_SERVERS, SCHEMA_REGISTRY_PATH):

        self.avroProducer = AvroProducer(
            {
                'bootstrap.servers': BOOTSTRAP_SERVERS,
                'on_delivery': self.delivery_report,
                'schema.registry.url': SCHEMA_REGISTRY_PATH
            },
            default_value_schema=avro_schema)

        self.logger = logging.getLogger("VRODUCER")

    def produce_message(self, topic_name, message):
        self.avroProducer.produce(topic=topic_name, value=message)
        self.avroProducer.flush()

    def produce_message_bulk(self, topic_name, message_list):
        for message in message_list:
            self.avroProducer.produce(topic=TOPIC_NAME, value=message)
        self.avroProducer.flush()

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            self.logger.error('Message delivery failed: {}'.format(err))
        else:
            self.logger.info('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    p = AvroProducer({
        "bootstrap.servers": "PLAINTEXT://localhost:9092",
        "schema.registry.url": "http://localhost:8081",
    })
    try:
        while True:
            p.produce(topic=topic_name,
                      value=asdict(Purchase()),
                      value_schema=Purchase.schema)
            await asyncio.sleep(0.1)
    except:
        raise
Beispiel #31
0
class KafkaProducer:
    def __init__(
        self,
        key_schema_str,
        value_schema_str,
        raw_key_model,
        raw_value_model,
        topic_name,
        header_index,
    ):
        self.topic_name = topic_name
        self.header_index = header_index
        self.key_schema = avro.loads(key_schema_str)
        self.value_schema = avro.loads(value_schema_str)
        self.avro_producer = AvroProducer(
            {
                "bootstrap.servers": os.environ["BOOTSTRAP_SERVERS"],
                "schema.registry.url": os.environ["SCHEMA_REGISTRY_URL"],
                # Safe producer settings
                # 'enable.idempotence': True,
                # High throughput
                # 'compression.type': 'snappy',
                # 'linger.ms': 20,
                # 'batch.size': 32768
            },
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema,
        )
        self.raw_key_model = raw_key_model
        self.raw_value_model = raw_value_model

    def preprocessing(self, data):
        backed_key_obj = dict()
        backed_value_obj = dict()
        for key_model in self.raw_key_model:
            key_index = self.header_index[key_model]
            backed_key_obj[key_model] = data[key_index]
        for value_model in self.raw_value_model:
            value_index = self.header_index[value_model]
            backed_value_obj[value_model] = data[value_index]
        return backed_key_obj, backed_value_obj

    def produce_event(self, data, pre_process=True):
        if pre_process == True:
            key, value = self.preprocessing(data)
        else:
            key, value = data
        self.avro_producer.produce(topic=self.topic_name, key=key, value=value)
        self.avro_producer.poll(0.1)
Beispiel #32
0
async def produce(topic_name):
    p = AvroProducer({
        "bootstrap.servers": "PLAINTEXT://localhost:9092",
        "schema.registry.url": "http://localhost:8081",
    })
    try:
        while True:
            p.produce(
                topic=topic_name,
                value=asdict(ClickEvent()),
                value_schema=ClickEvent.schema,
            )
            await asyncio.sleep(0.1)
    except:
        raise
Beispiel #33
0
    def __init__(self):
        os.chdir(os.path.dirname(__file__))
        pwd = os.getcwd()

        self._producer = AvroProducer(
            {
                'bootstrap.servers': KAFKA_SERVER,
                'schema.registry.url': SCHEMA_REGISTRY_URL,
                'security.protocol': 'ssl',
                'ssl.ca.location': pwd + CAFILE,
                'ssl.certificate.location': pwd + CERTFILE,
                'ssl.key.location': pwd + KEYFILE
            },
            default_key_schema=key_schema,
            default_value_schema=value_schema)
Beispiel #34
0
    def __init__(self,
                 topic,
                 value_schema_path,
                 key_schema_path=None,
                 config=None):
        schema = {'default_value_schema': avro.load(value_schema_path)}
        if key_schema_path is not None:
            schema['default_key_schema'] = avro.load(key_schema_path)

        self.producer = AvroProducer(
            {
                **load_producer_config(),
                **(config or {})
            }, **schema)
        self.topic = topic
def produce(topic, conf):
    """
        Produce User records
    """

    from confluent_kafka.avro import AvroProducer

    producer = AvroProducer(conf, default_value_schema=record_schema)

    print("Producing user records to topic {}. ^c to exit.".format(topic))
    while True:
        # Instantiate new User, populate fields, produce record, execute callbacks.
        record = User()
        try:
            record.name = input("Enter name: ")
            record.favorite_number = int(input("Enter favorite number: "))
            record.favorite_color = input("Enter favorite color: ")

            # The message passed to the delivery callback will already be serialized.
            # To aid in debugging we provide the original object to the delivery callback.
            producer.produce(topic=topic, value=record.to_dict(),
                             callback=lambda err, msg, obj=record: on_delivery(err, msg, obj))
            # Serve on_delivery callbacks from previous asynchronous produce()
            producer.poll(0)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
 def test_produce_with_empty_value_no_schema(self):
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry)
     with self.assertRaises(ValueSerializerError):
         producer.produce(topic='test', value='', key='not empty')
from lipsum import generate_words
import os
import random

SCHEMA_REGISTRY_URL = 'http://172.17.0.5:8081'
BOOTSTRAP_SERVERS = '172.17.0.4'

AVSC_DIR = os.path.dirname(os.path.realpath(__file__))
KEY_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'primitive_string.avsc'))
VALUE_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'basic_schema.avsc'))

TOPIC = 'avrotopic'
KEY = "mykey"

avroProducer = AvroProducer({'bootstrap.servers': BOOTSTRAP_SERVERS,
                             'schema.registry.url': SCHEMA_REGISTRY_URL},
                            default_key_schema=KEY_SCHEMA,
                            default_value_schema=VALUE_SCHEMA)


for i in xrange(100):
    value = {"name": generate_words(count=1),
             "surname": generate_words(count=2),
             "number": random.randint(0, 100)}

    print str(value)

    avroProducer.produce(topic=TOPIC,
                         value=value,
                         key=KEY)

 def test_produce_no_value_schema(self):
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ValueSerializerError):
         # Producer should not accept a value with no schema
         producer.produce(topic='test', value={"name": 'abc"'})
 def test_produce_no_key_schema(self):
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(KeySerializerError):
         # If the key is provided as a dict an avro schema must also be provided
         producer.produce(topic='test', key={"name": 'abc"'})
 def test_produce_value_and_key_schemas(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema,
                             default_key_schema=value_schema)
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, key={"name": 'abc"'})