예제 #1
0
def produce(topic_name, patient):
    ''' Produces patient's biometrics measured every second
        on a kafka topic

        Args:
            topic (str): The name of the topic (default: 'biometrics_default')
            patient (Patient): A Patient object
    '''

    global delivered_status
    parser = SafeConfigParser()
    CONFIG_PATH = str(pathlib.Path(
        __file__).parent.parent.absolute()) + '/config/pipeline.cfg'
    parser.read(CONFIG_PATH)
    KAFKA_HOST = parser.get('KAFKA', 'kafka_host')
    KAFKA_PORT = parser.get('KAFKA', 'kafka_port')
    SCHEMA_USER = parser.get('KAFKA', 'kafka_schema_registry_user')
    SCHEMA_PASSWORD = parser.get('KAFKA', 'kafka_schema_registry_password')
    SCHEMA_PORT = parser.get('KAFKA', 'kafka_schema_registry_port')
    AVRO_PATH = str(pathlib.Path(
        __file__).parent.parent.absolute()) + '/config/schemas'
    VALUE_SCHEMA = open(
        AVRO_PATH+'/biometrics.avsc').read().replace('schema_name', topic_name)
    KEY_SCHEMA = open(
        AVRO_PATH+'/key.avsc').read().replace('schema_name', topic_name)
    SSL_PATH = str(pathlib.Path(
        __file__).parent.parent.absolute()) + '/config/ssl'

    patient_biometrics = patient.get_biometrics()
    patient_id = patient.get_id()

    avroProducer = AvroProducer({
        'bootstrap.servers': f'{KAFKA_HOST}:{KAFKA_PORT}',
        "security.protocol": "ssl",
        'ssl.ca.location': f'{SSL_PATH}/ca.pem',
        'ssl.certificate.location': f'{SSL_PATH}/service.cert',
        'ssl.key.location': f'{SSL_PATH}/service.key',
        'schema.registry.url': f'https://{SCHEMA_USER}:{SCHEMA_PASSWORD}@{KAFKA_HOST}:{SCHEMA_PORT}'
    }, default_key_schema=avro.loads(KEY_SCHEMA), default_value_schema=avro.loads(VALUE_SCHEMA))
    avroProducer.produce(topic=topic_name,
                         value=patient_biometrics, key=patient_id, callback=error_callback)
    avroProducer.flush()

    print(f'Published {patient_biometrics} to the topic "{topic_name}"')
    return delivered_status
def produce(topic: str, brokers: str, schema_registry_url: str):

    key_schema = loads('"string"')

    value_schema = loads(
        dumps({
            "type":
            "record",
            "namespace":
            "example.avro",  #VERY IMPORTANT TO MAP TO JAVA OBJECT
            "name":
            "test_record",
            "fields": [{
                "name": "id",
                "type": "int"
            }, {
                "name": "date",
                "type": ["int", "null"]
            }, {
                "name": "info",
                "type": "string"
            }]
        }))

    avro_producer_settings = {
        'bootstrap.servers': brokers,
        'group.id': 'groupid',
        'schema.registry.url': schema_registry_url
    }

    producer = AvroProducer(avro_producer_settings)

    i = 0
    while True:
        sleep(1)
        key = "message_key_" + str(i)
        value = {"id": i, "date": (2 + i**2), "info": "sensor_" + str(i)}
        print("Message Produced: key = {} value = {}".format(key, value))
        producer.produce(topic=topic,
                         key=key,
                         value=value,
                         key_schema=key_schema,
                         value_schema=value_schema)
        i += 1
    producer.flush()
예제 #3
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY,
            "linger.ms": 500,
            "batch.num.messages": 10000,
            "compression.type": "lz4"
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
#         producer_props = {
#             "linger.ms": 500,
#             "batch.num.messages": 10000,
#             "compression.type": "lz4"
#         }
        self.producer = AvroProducer(
            self.broker_properties,
            default_key_schema=key_schema,
            default_value_schema=value_schema,
        )
예제 #4
0
def produce_kafka_dataset_mce(mce):
    """
    Produce MetadataChangeEvent records.
    """
    conf = {
        'bootstrap.servers': BOOTSTRAP,
        'schema.registry.url': SCHEMAREGISTRY
    }
    record_schema = avro.load(AVROLOADPATH)
    producer = AvroProducer(conf, default_value_schema=record_schema)

    try:
        producer.produce(topic=KAFKATOPIC, value=mce)
        producer.poll(0)
        sys.stdout.write('\n%s has been successfully produced!\n' % mce)
    except ValueError as e:
        sys.stdout.write('Message serialization failed %s' % e)
    producer.flush()
예제 #5
0
def produce(schema_json, data):

    print('schema:\n')
    pprint.pprint(schema_json)
    print('\n')

    print('message:\n')
    pprint.pprint(data)
    print('\n')

    schema_avro = avro.loads(json.dumps(schema_json))
    producer = AvroProducer({'bootstrap.servers': broker},
                            default_value_schema=schema_avro,
                            schema_registry=schema_registry)

    producer.poll(0)
    producer.produce(topic=topic, value=data)
    producer.flush()
예제 #6
0
 def prepareProducer(self,groupID = "pythonproducers",key_schema = "", value_schema = ""):
     options ={
             'bootstrap.servers':  self.kafka_brokers,
             'schema.registry.url': self.schema_registry_url,
             'group.id': groupID,
             'security.protocol': 'SASL_SSL',
             'sasl.mechanisms': 'SCRAM-SHA-512',
             'sasl.username': self.scram_username,
             'sasl.password': self.scram_password,
             'ssl.ca.location': os.environ['PEM_CERT'],
             'schema.registry.ssl.ca.location': os.environ['PEM_CERT']
     }
     # Print out the configuration
     print("--- This is the configuration for the avro producer: ---")
     print(options)
     print("---------------------------------------------------")
     # Create the Avro Producer
     self.producer = AvroProducer(options,default_key_schema=key_schema,default_value_schema=value_schema)
예제 #7
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    p = AvroProducer(
        {
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "schema.registry.url": "http://localhost:8081",
        }
    )
    try:
        while True:
            p.produce(
                topic=topic_name,
                value=asdict(ClickEvent()),
                value_schema=ClickEvent.schema,
            )
            await asyncio.sleep(0.1)
    except:
        raise
예제 #8
0
파일: common.py 프로젝트: corersky/datahub
def produce_dataset_mce(mce, kafka_config):
    """
    Produces a MetadataChangeEvent to Kafka
    """
    conf = {
        'bootstrap.servers': kafka_config.bootstrap_server,
        'schema.registry.url': kafka_config.schema_registry
    }
    record_schema = avro.load(kafka_config.avsc_path)
    producer = AvroProducer(conf, default_value_schema=record_schema)

    try:
        producer.produce(topic=kafka_config.kafka_topic, value=mce)
        producer.poll(0)
        print('\n%s has been successfully produced!\n' % mce)
    except ValueError as e:
        print('Message serialization failed %s' % e)
    producer.flush()
예제 #9
0
def load(datafile, schema, server, topic):
    value_schema = avro.load(schema)
    # key_schema = avro.load('KeySchema.avsc')

    avroProducer = AvroProducer(
        {
            'bootstrap.servers': server,
            'schema.registry.url': schema_registry_url
        },
        default_value_schema=value_schema)

    with open(datafile, 'rb') as csvfile:
        header = csvfile.readline()
        if not header.startswith('\xEF\xBB\xBF'):
            print 'have no header'
            csvfile.seek(0)

        spamreader = csv.reader(csvfile)
        for row in spamreader:

            data = {
                'STSN': row[0],
                'YWSN': row[1],
                'YWWT': row[2],
                'YWVALUE': int(row[3]),
                'YWTIME': row[4],
                'WDSN': row[5],
                'WDWT': row[6],
                'WDVALUE': int(row[7]),
                'WDTIME': row[8],
                'YLSN': row[9],
                'YLWT': row[10],
                'YLVALUE': int(row[11]),
                'YLTIME': row[12],
                'LLSN': row[13],
                'LLWT': row[14],
                'LLVALUE': int(row[15]),
                'LLDIRECT': int(row[16]),
                'LLTIME': row[17]
            }

            avroProducer.produce(topic=topic, value=data)

        avroProducer.flush()
예제 #10
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            # TODO
            "broker_url": "PLAINTEXT://localhost:9092",
            # TODO
            "schema_registry_url": "http://localhost:8081"
            # TODO
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(
            {
                "bootstrap.servers":
                self.broker_properties['broker_url'],
                "schema.registry.url":
                self.broker_properties['schema_registry_url']
            },
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)
def produce_message(count):
    value_schema = avro.loads(value_schema_str)
    key_schema = avro.loads(key_schema_str)
    key = {"name": "Key2"}

    avroProducer = AvroProducer(
        {
            'bootstrap.servers': 'localhost:9092',
            'schema.registry.url': 'http://localhost:8081'
        },
        default_key_schema=key_schema,
        default_value_schema=value_schema)

    for value in range(count):
        avroProducer.produce(topic='demo',
                             value=message(value),
                             key=key,
                             callback=delivery_callback)
        avroProducer.flush()
예제 #12
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            # TODO
            'BROKER_URL':'PLAINTEXT://localhost:9092',
            # TODO
            'SCHEMA_REGISTRY': 'http://localhost:8081',
            # TODO
            'KAFKA_REST_PROXY': 'http://localhost:8082'
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(
            {"bootstrap.servers": self.broker_properties['BROKER_URL'],
            "schema.registry.url": self.broker_properties['SCHEMA_REGISTRY']},
            #schema_registry = CachedSchemaRegistryClient(self.broker_properties['SCHEMA_REGISTRY']),
            default_key_schema=self.key_schema, 
            default_value_schema=self.value_schema
        )
예제 #13
0
파일: common.py 프로젝트: yhjyoon/datahub
def produce_dataset_mce(mce, kafka_config):
    """
    Produces a MetadataChangeEvent to Kafka
    """
    conf = {
        'bootstrap.servers': kafka_config.bootstrap_server,
        'on_delivery': delivery_report,
        'schema.registry.url': kafka_config.schema_registry
    }
    key_schema = avro.loads('{"type": "string"}')
    record_schema = avro.load(kafka_config.avsc_path)
    producer = AvroProducer(conf,
                            default_key_schema=key_schema,
                            default_value_schema=record_schema)

    producer.produce(topic=kafka_config.kafka_topic,
                     key=mce['proposedSnapshot'][1]['urn'],
                     value=mce)
    producer.flush()
예제 #14
0
 def __init__(self, **kaws):
     self.config = kaws['config']
     self.logger = kaws['logger']
     self.kafka = self.config["kafka"]
     bootstrap_servers = self.kafka['producer']['bootstrap_servers']
     schema_registery = self.kafka['producer']['schema_registery']
     key_schema = avro.loads(key_schema_str)
     value_schema = avro.loads(value_schema_str)
     bootstrap_servers = self.kafka['producer']['bootstrap_servers']
     schema_registery = self.kafka['producer']['schema_registery']
     key_schema = avro.loads(key_schema_str)
     value_schema = avro.loads(value_schema_str)
     self.avroProducer = AvroProducer(
         {
             'bootstrap.servers': bootstrap_servers,
             'schema.registry.url': schema_registery
         },
         default_key_schema=key_schema,
         default_value_schema=value_schema)
예제 #15
0
    def __init__(self, broker_urls, registry_url, topic):
        self.broker_urls = broker_urls
        self.registry_url = registry_url

        if topic == "login":
            self.value_schema = schema_login
            self.key_schema = schema_login
        if topic == "message":
            self.value_schema = schema_message
            self.key_schema = schema_message
        if topic == "mouse":
            self.value_schema = schema_mouse
            self.key_schema = schema_mouse

        self.avroProducer = AvroProducer({
            'bootstrap.servers': self.broker_urls,
            'on_delivery': delivery_report,
            'schema.registry.url': self.registry_url,
        }, default_key_schema=self.key_schema, default_value_schema=self.value_schema)
예제 #16
0
 def prepareProducer(self,groupID = "pythonproducers",key_schema = "", value_schema = ""):
     options ={
             'bootstrap.servers':  self.kafka_brokers,
             'schema.registry.url': self.schema_registry_url,
             'group.id': groupID
     }
     # We need this test as local kafka does not expect SSL protocol.
     if (self.kafka_env != 'LOCAL'):
         options['security.protocol'] = 'SASL_SSL'
         options['sasl.mechanisms'] = 'PLAIN'
         options['sasl.username'] = '******'
         options['sasl.password'] = self.kafka_apikey
     if (self.kafka_env == 'ICP'):
         options['ssl.ca.location'] = os.environ['PEM_CERT']
         options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT']
     print("--- This is the configuration for the producer: ---")
     print(options)
     print("---------------------------------------------------")
     self.producer = AvroProducer(options,default_key_schema=key_schema,default_value_schema=value_schema)
예제 #17
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        # # "schema.registry.url" = "http://schema-registry:8081/" ---> Docker entry
        #
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL,
            "default.topic.config": {
                "acks": "all"
            }
        }

        self.admin_client = AdminClient({"bootstrap.servers": BROKER_URL})

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        #schema_registry = CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL)

        self.producer = AvroProducer(
            self.broker_properties,  #schema_registry=schema_registry,
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)
예제 #18
0
def setupTopic_routerHsBridge(server, schema_registry_url):
    global regexp_routerHSBridge
    global avroProducer_routerHSBridge
    global count_routerHSBridge
    count_routerHSBridge = 0

    # Topic routerHSBridge scans for a regexathat returns this pattern:
    #<31>Apr  1 06:51:39 KKM-WiFi24K-CCR10 hs-bridge_CCO_243: new host detected 54:9F:13:6F:3C:3A/10.243.52.180 by TCP :50872 -> 203.113.34.26:80
    avro_schema = """
  {"namespace": "weblog.kkr.avro",
  "type": "record",
  "name": "routerHSBridge",
  "fields": [
     {"name": "month", "type": ["null","string"],"default":null}
    ,{"name": "day", "type": ["null","string"],"default":null}
    ,{"name": "time", "type": ["null","string"],"default":null}
    ,{"name": "kkm", "type": ["null","string"],"default":null}
    ,{"name": "hs_bridge"  , "type": ["null","string"],"default":null}
    ,{"name": "message"  , "type": ["null","string"],"default":null}
  ]
  }"""
    schema = avro.loads(avro_schema)
    avroProducer_routerHSBridge = AvroProducer(
        {
            'bootstrap.servers': server,
            'schema.registry.url': schema_registry_url
        },
        default_value_schema=schema)

    regexp_routerHSBridge = re.compile(('(<27>|<28>|<30>|<31>)'
                                        '(?P<month>\D{3})'
                                        '(?P<whitespace1>\s{1,2})'
                                        '(?P<day>\d{1,2})'
                                        '(?P<whitespace2>\s{1,2})'
                                        '(?P<time>\d{2}:\d{2}:\d{2})'
                                        '(?P<whitespace3>\s{1,2})'
                                        '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})'
                                        '(?P<whitespace4>\s{1})'
                                        '(?P<hs_bridge>[^\s]+:)'
                                        '(?P<whitespace5>\s{1})'
                                        '(?P<message>[^\s]+)'
                                        '.*'), re.IGNORECASE)
예제 #19
0
    def __init__(self):

        # This is the Avro Schema for messages
        self.value_schema_str = """
        {  "name": "value",
           "type": "record",
           "fields" : [
             {"name" : "network", "type" : "float"},
             {"name" : "disk", "type" : "float"},
             {"name" : "cpu", "type" : "float"},
             {"name" : "timestamp", "type" : "long"}
           ]
        }"""
        self.value_schema = avro.loads(self.value_schema_str)

        self.avroProducer = AvroProducer({
            'bootstrap.servers': 'broker:29092',
            'on_delivery': self.delivery_report,
            'schema.registry.url': 'http://schema-registry:8081'
            }, default_value_schema=self.value_schema)
예제 #20
0
def setupTopic_routerElse(server, schema_registry_url):
    global regexp_routerElse
    global avroProducer_routerElse
    global count_routerElse
    count_routerElse = 0

    # Topic routerElse scans for a regex that returns this pattern:
    #<31>Apr  1 07:00:06 CWT-WiFi24K-CCR05 .*
    # for example :
    #<31>Apr  1 07:00:06 CWT-WiFi24K-CCR05 already 15 logins in progress\n'
    avro_schema = """
  {"namespace": "weblog.kkr.avro",
  "type": "record",
  "name": "routerElse",
  "fields": [
     {"name": "month", "type": ["null","string"],"default":null}
    ,{"name": "day", "type": ["null","string"],"default":null}
    ,{"name": "time", "type": ["null","string"],"default":null}
    ,{"name": "kkm", "type": ["null","string"],"default":null}
    ,{"name": "message"  , "type": ["null","string"],"default":null}
  ]
  }"""
    schema = avro.loads(avro_schema)
    avroProducer_routerElse = AvroProducer(
        {
            'bootstrap.servers': server,
            'schema.registry.url': schema_registry_url
        },
        default_value_schema=schema)

    regexp_routerElse = re.compile(('(<30>|<31>)'
                                    '(?P<month>\D{3})'
                                    '(?P<whitespace1>\s{1,2})'
                                    '(?P<day>\d{1,2})'
                                    '(?P<whitespace2>\s{1,2})'
                                    '(?P<time>\d{2}:\d{2}:\d{2})'
                                    '(?P<whitespace3>\s{1,2})'
                                    '(?P<kkm>\D{3}-.{7}-\D{3}\d{1,2})'
                                    '(?P<whitespace4>\s{1,3})'
                                    '(?P<message>[^\s]+)'
                                    '.*'), re.IGNORECASE)
예제 #21
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": BOOTSTRAP_SERVER,
            "client.id": "jorge_debug",
            "schema.registry.url": SCHEMA_REGISTRY_URL,
            'acks': -1
            
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(
            self.broker_properties, 
            default_key_schema = self.key_schema,
            default_value_schema = self.value_schema
            
         )
예제 #22
0
def run(dataset):
    value = avro.loads(value_schema)
    key = avro.loads(key_schema)

    producer = AvroProducer({
        'bootstrap.servers': os.environ.get("BROKER", "127.0.0.1:9092"), 
        'schema.registry.url': os.environ.get("SCHEMA_REGISTRY", "http://127.0.0.1:8081"),
        # 'default.topic.config': {'compression.codec': 'snappy'}
        'compression.codec': 'snappy',
        'message.max.bytes': 15728640
    }, default_key_schema=key, default_value_schema=value)
    
    with open(dataset, "rb") as file:
        dataset = pickle.load(file)
        x_test =  dataset[1]["dataset"]
        one_element = x_test[0]
        # print(one_element.getbuffer())
        # result = io.BytesIO()
        # np.save(result, one_element)
        # result.seek(0)
        # r = result.read()
        # original_size = len(r)
        # new_size = len(zlib.compress(r))
        # print(sizeof_fmt( original_size ))
        # print(sizeof_fmt( new_size ))
        # print(original_size/new_size)

        # result.seek(0)

        # print(np.load(result).shape)

        # print(one_element.shape)

        for idx, el in enumerate( x_test ):
            print("Uploading: %d" %(idx))
            #result = io.BytesIO()
            # np.save(result, el)
            # result.seek(0)
            # data = zlib.compress( result.read() )
            # data = zlib.compress( result.read() )
            producer.produce(topic="balance-lastSeen", key={'index': idx}, value={"dataframe": el.tolist() })
예제 #23
0
def run(messenger):
    """Produce messages according to the specified Avro schema"""
    value_schema = avro.schema.Parse(open("schemas/Alert.avsc", "rb").read())
    conf = {
        'bootstrap.servers': "temple.di.uoa.gr:9092",
        'schema.registry.url': "http://temple.di.uoa.gr:8081"
    }
    avro_producer = AvroProducer(conf, default_value_schema=value_schema)
    while True:

        # Assemble avro-formatted message filled with generated data
        message = messenger.get_message()

        # Publish the message under the specified topic on the message bus
        avro_producer.produce(topic="RiskAlert", value=message)

        # Flush the buffer
        avro_producer.flush()

        # Wait a second
        time.sleep(1.0 / 1.0)
예제 #24
0
def produce_player_messages(context, topic, responses):
    producer = AvroProducer(
        {
            'bootstrap.servers': context.broker,
            'schema.registry.url': context.schema_registry_url
        },
        default_value_schema=context.response_schema)

    count = 0
    for response in responses:
        text = json.dumps(response)
        producer.produce(topic=topic,
                         value={
                             "uid": get_md5(text),
                             "content": text
                         })
        count += 1
        if count % 100 == 0:
            producer.flush()

    producer.flush()
예제 #25
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            'cleanup.policy': "delete",
            'delete.retention.ms': 5400000,
            "file.delete.delay.ms": 5400000,
        }
        self.client = AdminClient({"bootstrap.servers": KAFKA_URL})
        topic_metadata = self.client.list_topics(timeout=5)
        for topic in topic_metadata.topics.keys():
            Producer.existing_topics.add(topic)

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        schema_client = CachedSchemaRegistryClient(SCHEMA_REGISTRY_URL)
        self.producer = AvroProducer({"bootstrap.servers": KAFKA_URL},
                                     schema_registry=schema_client)
예제 #26
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL
        }

        #Configure the AvroProducer
        self.schema_registry = CachedSchemaRegistryClient(
            {"url": self.broker_properties["schema.registry.url"]})
        self.producer = AvroProducer(
            {"bootstrap.servers": self.broker_properties["bootstrap.servers"]},
            schema_registry=self.schema_registry,
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)
            logger.info(f"created topic: {self.topic_name}")
예제 #27
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        ## REVISAR ---------------
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "broker.id": 0,
            "log.dirs": "/tmp/kafka-logs",
            "zookeeper.connect": ZOOKEEPER_HOST,
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer({'bootstrap.servers': self.broker_properties["bootstrap.servers"],
                                      'schema.registry.url': self.broker_properties["schema.registry.url"]},
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema)
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self.broker_properties = {
            "bootstrap.servers": Producer.BROKER_URL,
            "schema.registry.url": Producer.REGISTRY_URL,
            "client.id": f"{Producer.NAMESPACE}.producer",
            "compression.type": "snappy",
            "linger.ms": 5000,
            "batch.num.messages": 1000,
            "queue.buffering.max.messages": 10000,
            "enable.idempotence": True,
            "acks": "all",
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # Instantiate Avro Producer
        try:
            self.producer = AvroProducer(self.broker_properties)
            logger.info(f"Created AvroProducer for {self.topic_name}")
        except Exception as e:
            logger.warning(
                f"Failed to instantiate AvroProducer for {self.topic_name}")
            logger.error(f"Error: {e}")
            exit()
예제 #29
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "BROKER_URL": "PLAINTEXT://kafka0:19092",
            "SCHEMA_REGISTRY_URL": "http://schema-registry:8081/"
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(
            {
                "bootstrap.servers":
                self.broker_properties.get("BROKER_URL"),
                "schema.registry.url":
                self.broker_properties.get("SCHEMA_REGISTRY_URL")
            },
            default_key_schema=key_schema,
            default_value_schema=value_schema)
예제 #30
0
    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        #Initializes a Producer object with basic settings
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self.replication_factor = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": "PLAINTEXT://localhost:9092",
            "client.id": "ex4",
            "linger.ms": 1000,
            "compression.type": "lz4",
            "batch.num.messages": 100,
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        schema_registry = CachedSchemaRegistryClient(
            {"url": SCHEMA_REGISTRY_URL})
        self.producer = AvroProducer({"bootstrap.servers": BROKER_URL},
                                     schema_registry=schema_registry)