예제 #1
0
def produce_tweet():
    if 'username' in request.cookies:
        id = request.form['id']
        content = request.form['content']
        location = request.form['location']
        # extract tags and mentions :)
        tags = [h for h in content.split() if h.startswith('#')]
        mentions = [h for h in content.split() if h.startswith('@')]

        value = {
            "author": f"{id}",
            "content": f"{content}",
            "timestamp": f"{time.time()}",
            "location": f"{location}",
            "tags": tags,
            "mentions": mentions
        }
        key = {"name": f"{id}"}

        p = AvroProducer(
            {
                'bootstrap.servers': BOOTSTRAP_SERVERS,
                'enable.idempotence':
                'true',  # for EOS: assures that only one tweet in sent
                'schema.registry.url': SCHEMA_REGISTRY_URL
            },
            default_key_schema=KEY_SCHEMA,
            default_value_schema=VALUE_SCHEMA)

        p.produce(topic=TOPIC, value=value, key=key)
        p.flush()
        return 'Tweet published!'
    else:
        return 'Oooops, your are not logged in...'
def produce(topic, conf):
    """
        Produce User records
    """

    from confluent_kafka.avro import AvroProducer

    producer = AvroProducer(conf, default_value_schema=record_schema)

    print("Producing user records to topic {}. ^c to exit.".format(topic))
    while True:
        # Instantiate new User, populate fields, produce record, execute callbacks.
        record = User()
        try:
            record.name = input("Enter name: ")
            record.favorite_number = int(input("Enter favorite number: "))
            record.favorite_color = input("Enter favorite color: ")

            # The message passed to the delivery callback will already be serialized.
            # To aid in debugging we provide the original object to the delivery callback.
            producer.produce(topic=topic, value=record.to_dict(),
                             callback=lambda err, msg, obj=record: on_delivery(err, msg, obj))
            # Serve on_delivery callbacks from previous asynchronous produce()
            producer.poll(0)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
예제 #3
0
 def test_produce_with_empty_key_value_with_schema(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({},
                             schema_registry=schema_registry,
                             default_value_schema=value_schema)
     producer.produce(topic='test', value={'name': 'abc'}, key='')
예제 #4
0
def kafka_producer(topic_name, BROKER_URL, SCHEMA_REGISTRY_URL,
                   AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME):
    """
    Kafka Avro Producer, produces events given schema
    """
    # Avro schema
    key_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/key_schema.json")
    value_schema = avro.load(
        f"{Path(__file__).parents[0]}/schemas/value_schema.json")
    # Get a handle on s3
    s3 = boto3.resource('s3',
                        aws_access_key_id=AWS_ACCESS_KEY_ID,
                        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
    # s3_object = s3.Object(bucket_name=S3_BUCKET_NAME, key=f'{OPERATOR}.txt')
    s3_object = s3.Object(bucket_name=S3_BUCKET_NAME, key='Bird.txt')
    streaming_body = s3_object.get()['Body']

    broker_properties = {
        "bootstrap.servers": BROKER_URL,
        "schema.registry.url": SCHEMA_REGISTRY_URL,
        "client.id": "base.producer",
    }

    producer = AvroProducer(
        broker_properties,
        default_key_schema=key_schema,
        default_value_schema=value_schema,
    )

    keys = [
        'bike_id', 'is_disabled', 'is_reserved', 'last_updated', 'lat', 'lon',
        'operator', 'vehicle_type'
    ]
    while True:
        try:
            for ln in codecs.getreader('utf-8')(streaming_body):
                # sl = ln.rstrip().split(",")
                d = dict((x.strip(), y.strip())
                         for x, y in (element.split(':')
                                      for element in ln.split(', ')))
                print(d)
                if set(keys).issubset((d.keys())):
                    producer.produce(
                        topic=topic_name,
                        key={"timestamp": time_millis()},
                        value=asdict(
                            Event(d['bike_id'],
                                  distutils.util.strtobool(d['is_disabled']),
                                  distutils.util.strtobool(d['is_reserved']),
                                  int(d['last_updated']), float(d['lat']),
                                  float(d['lon']), d['operator'],
                                  d['vehicle_type'])),
                        on_delivery=acked)
                time.sleep(2)

        except KeyboardInterrupt:
            break

    producer.flush(timeout=1)
예제 #5
0
def send_record():
    key_schema, value_schema = load_avro_schema_from_file()

    producer_config = {
        "bootstrap.servers": 'kafka.qa-aws.intranet..:9092',
        "schema.registry.url": 'http://schema-registry.qa-aws.intranet..:8081'
    }

    producer = AvroProducer(producer_config,
                            default_key_schema=key_schema,
                            default_value_schema=value_schema)

    key = str(uuid.uuid4())
    # value = json.loads(data)

    try:
        producer.produce(topic='fct.dsr.financialservices.loan.Limites',
                         key=key,
                         value=data)
    except Exception as e:
        print(f"Exception while producing record value - {data}: {e}")
    else:
        print(f"Successfully producing record value")

    producer.flush()
예제 #6
0
 def test_produce_no_value(self):
     key_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     producer = AvroProducer(
         {'schema.registry.url': 'http://127.0.0.1:9001'},
         default_key_schema=key_schema)
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', key={"name": 'abc"'})
예제 #7
0
class AvroProducerFacade:
    def __init__(self, name, emit_datum, broker, schema_registry_url):
        self.name = name
        self.emit_datum = emit_datum
        schema = avro.loads(get_schema_def())
        self.producer = AvroProducer(
            {
                'bootstrap.servers': broker,
                'schema.registry.url': schema_registry_url,
                **get_sr_config_from_environment(),
                **get_kafka_config_from_environment(),
            },
            default_key_schema=schema,
            default_value_schema=schema)

    def delivery_callback(self, err, msg):
        if err:
            log.debug("Failed to send from '%s': %s", self.name, err)
            datum = Datum(bad_count=1)
        else:
            datum = Datum(good_count=1)
        self.emit_datum(datum)

    def produce(self, topic, poll_wait=0):
        value = {'name': 'foo'}
        self.producer.produce(topic=topic,
                              callback=self.delivery_callback,
                              key=value,
                              value=value)
        self.producer.poll(poll_wait)

    def close(self):
        self.producer.flush()
예제 #8
0
def test_dnn():
    copyfile('research.zip', '/mnt/archives/research.zip')
    value_schema = avro.load('avro_sch/res_prod.json')
    value = {"command": "start", "path": "research.zip", "id": "1"}

    avroProducer = AvroProducer(
        {
            'bootstrap.servers': KAFKA_BROKER_URL,
            'schema.registry.url': 'http://schema_registry:8081'
        },
        default_value_schema=value_schema)

    avroProducer.produce(topic='dnn.data', value=value)
    print("msg produced")

    c = AvroConsumer({
        'bootstrap.servers': "broker:9092",
        'group.id': 'groupid',
        'schema.registry.url': 'http://schema_registry:8081'
    })

    c.subscribe(["dnn.results"])

    while True:
        try:
            print("Start polling")
            msg = c.poll(10)

        except SerializerError as e:
            print("Message deserialization failed for {}: {}".format(msg, e))
            break

        if msg is None:
            continue

        if msg.error():
            print("AvroConsumer error: {}".format(msg.error()))
            continue
        msg = msg.value()

        assert msg["code"] == "success", "Inference failed"

        assert type(msg) == dict, "Wrong type of msg variable"

        assert 'path' in msg, "No path field in returned message"

        for res in os.listdir(
                os.path.join("/mnt/results/experiments", msg["path"])):
            if os.path.isdir(
                    os.path.join("/mnt/results/experiments", msg["path"],
                                 res)):
                png_files = glob.glob(
                    os.path.join("/mnt/results/experiments", msg["path"], res,
                                 "*.png"))
                assert len(
                    png_files) > 0, "No png output files found for {}".format(
                        res)

        assert len(msg["nods"]) < 5, "Too many nodules found"
        break
 def test_produce_with_empty_key_no_schema(self):
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_value_schema=value_schema)
     with self.assertRaises(KeySerializerError):
         producer.produce(topic='test', value=0.0, key='')
예제 #10
0
def send_record(args):
    key_schema, value_schema = load_avro_schema_from_file(
        args.key_schema_file, args.value_schema_file)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "schema.registry.url": args.schema_registry,
    }

    producer = AvroProducer(
        producer_config,
        default_key_schema=key_schema,
        default_value_schema=value_schema,
    )

    key = json.loads(args.record_key) if args.record_key else str(uuid.uuid4())
    value = json.loads(args.record_value)

    try:
        producer.produce(topic=args.topic, key=key, value=value)
    except Exception as e:
        print(
            f"Exception while producing record value - {value} to topic - {args.topic}: {e}"
        )
    else:
        print(
            f"Successfully producing record value - {value} to topic - {args.topic}"
        )

    producer.flush()
예제 #11
0
class QRSProducer(object):
    def __init__(self, **kwargs):
        self.TOPIC = kwargs.get("TOPIC", "db")
        self.schema_registry_url = kwargs.get("SCHEMA_REGISTRY_URL")
        self.logger = kwargs.get("logger", logging.getLogger())
        self.Q = kwargs.get("Q")

        self.bootstrap_servers = kwargs.get("BOOTSTRAP_SERVERS")

        self.producer = AvroProducer(
            {
                'bootstrap.servers': self.bootstrap_servers,
                'on_delivery': self.delivery_report,
                'schema.registry.url': self.schema_registry_url
            },
            default_key_schema=key_schema,
            default_value_schema=value_schema)

    def delivery_report(self, error, message):
        """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
        if error is not None:
            print('Message delivery failed: {}'.format(error))
        else:
            print('Message delivered to {} [{}]'.format(
                message.topic(), message.partition()))

    def produce(self, key, value):
        self.producer.produce(topic=self.TOPIC, value=value, key=key)
        self.producer.flush()
예제 #12
0
class vroducer():
    def __init__(self, avro_schema, BOOTSTRAP_SERVERS, SCHEMA_REGISTRY_PATH):

        self.avroProducer = AvroProducer(
            {
                'bootstrap.servers': BOOTSTRAP_SERVERS,
                'on_delivery': self.delivery_report,
                'schema.registry.url': SCHEMA_REGISTRY_PATH
            },
            default_value_schema=avro_schema)

        self.logger = logging.getLogger("VRODUCER")

    def produce_message(self, topic_name, message):
        self.avroProducer.produce(topic=topic_name, value=message)
        self.avroProducer.flush()

    def produce_message_bulk(self, topic_name, message_list):
        for message in message_list:
            self.avroProducer.produce(topic=TOPIC_NAME, value=message)
        self.avroProducer.flush()

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            self.logger.error('Message delivery failed: {}'.format(err))
        else:
            self.logger.info('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))
예제 #13
0
    def produce(
        self,
        http_check_result: HttpCheckResult,
    ):
        key_schema = avro.load(self.config.avro_key_schema)
        value_schema = avro.load(self.config.avro_value_schema)

        p = AvroProducer(
            {
                "bootstrap.servers": self.config.broker,
                "on_delivery": self.delivery_report,
                "schema.registry.url": self.config.schema_registry_url,
            },
            default_key_schema=key_schema,
            default_value_schema=value_schema,
        )
        key = {"timestamp": http_check_result.timestamp.isoformat()}
        value = {
            "status_code": http_check_result.status_code,
            "matches_regex": http_check_result.match_regex,
            "response_time_seconds": http_check_result.response_time_seconds,
        }
        logger.debug(f"Produced message: {key} {value}")
        p.produce(topic=self.config.topic, value=value, key=key)
        p.flush()
예제 #14
0
class Collector:

	def __init__(self):
		self._logger = logging.getLogger('gunicorn.error')

		value_schema = avro.loads(value_schema_str)
		key_schema = avro.loads(key_schema_str)

		self._producer = AvroProducer({
			'bootstrap.servers': f'{os.getenv("BROKER_HOST")}:9092',
			'schema.registry.url': f'http://{os.getenv("SCHEMA_REGISTRY_HOST")}:8081',
			'on_delivery': self._delivery_report
			}, default_key_schema=key_schema, default_value_schema=value_schema)

	def collect_phrase(self, phrase):
		phrase = phrase.lower().translate({ord(i): None for i in '|'}) # Remove pipe characther, which is treated as a special character in this system 
		self._producer.produce(topic='phrases', value={"phrase": phrase}, key={"phrase": phrase})
		self._producer.flush()

	def _delivery_report(self, err, msg):
		""" Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
		if err is not None:
		    self._logger.error('Message delivery to broker failed: {}'.format(err))
		else:
		    self._logger.info('Message delivered to broker on {} [{}]'.format(msg.topic(), msg.partition()))
def write_tweets(tweets, filename):
    ''' Function that appends tweets to a file. '''
    value_schema = avro.load('ValueSchema.avsc')
    key_schema = avro.load('KeySchema.avsc')

    avroProducer = AvroProducer(
    {'bootstrap.servers': '172.27.146.20:9092', 'schema.registry.url': 'http://172.27.146.20:8081'},
    default_key_schema=key_schema, default_value_schema=value_schema)

    for tweet in tweets :
        x = json.dumps(tweet._json)
        jsonObj = json.loads(x)
        created_at=jsonObj['created_at']
        id_str=jsonObj['id_str']
        name=jsonObj['user']['name']
        screen_name=jsonObj['user']['screen_name']
        text=jsonObj['text']

        key = {"id_str": id_str }
        value = {"id_str": id_str, "created_at": created_at, "name": name, "screen_name": screen_name, "text": text}
        avroProducer.produce(topic='bigData1', value=value, key=key, key_schema=key_schema, value_schema=value_schema)
        print(value)
        sleep(0.01)

    avroProducer.flush(10)
예제 #16
0
def produce():
    value_schema = avro.loads(value_schema_str)
    key_schema = avro.loads(key_schema_str)

    def delivery_report(err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            print('Message delivery failed: {}'.format(err))
        else:
            print('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    avro_producer = AvroProducer(
        {
            'bootstrap.servers': config.BOOTSTRAP_SERVERS,
            'on_delivery': delivery_report,
            'schema.registry.url': config.SCHEMA_REGISTRY_URL
        },
        default_key_schema=key_schema,
        default_value_schema=value_schema)

    cluster_metadata = avro_producer.list_topics()
    if TOPIC_NAME not in cluster_metadata.topics.keys():
        for name in most_common_names_usa:
            value = {"rank": name[0], "name": name[1], "data": name[2]}
            key = {"rank": name[0]}
            avro_producer.produce(topic=TOPIC_NAME, value=value, key=key)
        avro_producer.flush()
    else:
        print(f"{TOPIC_NAME} exists, do nothing")
예제 #17
0
def produce_test_messages_with_avro(
        avro_producer: AvroProducer,
        topic: Tuple[str, int]) -> Iterable[KafkaMessage]:
    topic_name, num_partitions = topic
    with open("tests/test_samples/key_schema.avsc", "r") as file:
        key_schema = load_schema(file.read())
    with open("tests/test_samples/value_schema.avsc", "r") as file:
        value_schema = load_schema(file.read())
    messages = []
    for i in range(10):
        partition = random.randrange(0, num_partitions)
        key = {"id": str(i)}
        value = {"first": "Firstname", "last": "Lastname"}
        messages.append(
            KafkaMessage(json.dumps(key), json.dumps(value), partition,
                         key_schema, value_schema))
        avro_producer.produce(
            topic=topic_name,
            key=key,
            value=value,
            key_schema=key_schema,
            value_schema=value_schema,
            partition=partition,
        )
        avro_producer.flush()
    return messages
예제 #18
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    #
    # TODO: Create a CachedSchemaRegistryClient. Use SCHEMA_REGISTRY_URL.
    #       See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/cached_schema_registry_client.py#L47
    #
    schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL})

    #
    # TODO: Replace with an AvroProducer.
    #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
    #
    p = AvroProducer({"bootstrap.servers": BROKER_URL},
                     schema_registry=schema_registry)
    while True:
        #
        # TODO: Replace with an AvroProducer produce. Make sure to specify the schema!
        #       Tip: Make sure to serialize the ClickEvent with `asdict(ClickEvent())`
        #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
        #
        p.produce(
            topic=topic_name,
            value=asdict(ClickEvent()),
            # TODO: Supply schema
            value_schema=ClickEvent.schema)
        await asyncio.sleep(1.0)
예제 #19
0
    def __init__(self, clf, input_shape, y_train): # input_shape: x_train.shape[1:]
        self.searcher_args = {}
        clf.y_encoder = OneHotEncoder()
        clf.y_encoder.fit(y_train)

        self.searcher_args['n_output_node'] = clf.get_n_output_node()
        self.searcher_args['input_shape'] = input_shape
        self.searcher_args['path'] = clf.path
        self.searcher_args['metric'] = clf.metric
        self.searcher_args['loss'] = clf.loss
        self.searcher_args['verbose'] = clf.verbose
        super().__init__(**self.searcher_args)
        clf.save_searcher(self)
        clf.searcher = True
        if publish:
            # Not the best solution, but I wont the code to be testable with pipenv shell in the git directory of autokeras!
            from confluent_kafka import avro
            from confluent_kafka.avro import AvroProducer
 
            value = avro.loads(value_schema)
            key = avro.loads(key_schema)

            global producer
            producer = AvroProducer({
                'bootstrap.servers': os.environ.get("BROKER", "95.158.189.52:9092"), 
                'schema.registry.url': os.environ.get("SCHEMA_REGISTRY", "http://95.158.189.52:8081"),
                'message.max.bytes': 15728640
            }, default_key_schema=key, default_value_schema=value)

            producer.produce(topic="autokeras-queen-1", key={"loss": 0, "accuracy": 0}, value={"model": b"Starting to produce models"})
            
            print("Will publish to kafka")
class KafkaClient:
    """
    client for publishing vectorization results to kafka
    """
    def __init__(self,
                 schema_registry='http://127.0.0.1:8081',
                 bootstrap_servers='localhost:9092',
                 topic='paintings'):
        self.painting_schema = avro.load('../avro/painting.avsc')
        self.painting_key_schema = avro.load('../avro/painting.key.avsc')
        self.topic = topic
        self.avro_producer = AvroProducer(
            {
                'bootstrap.servers': bootstrap_servers,
                'schema.registry.url': schema_registry,
                'default.topic.config': {
                    'acks': 'all'
                }
            },
            default_value_schema=self.painting_schema,
            default_key_schema=self.painting_key_schema)

    def submit(self, vectorized_img):
        if isinstance(vectorized_img, VectorizedImage):
            value = vectorized_img.to_dict()
            self.avro_producer.produce(
                topic=self.topic,
                key={'filename': vectorized_img.filename},
                value=value)
        else:
            raise Exception("vectorized image must be an instances of " +
                            VectorizedImage.__name__)

    def flush(self):
        self.avro_producer.flush()
 def test_produce_with_custom_registry(self):
     schema_registry = MockSchemaRegistryClient()
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({}, schema_registry=schema_registry)
     producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                      key_schema=key_schema)
예제 #22
0
class MyAvroProducer():
    def __init__(self, schema_name, topic):
        kafka_cfg = parse_kafka_config()
        key_schema, value_schema = load_avro_schema_from_registry(
            schema_name, kafka_cfg['schema-registry-url'])

        producer_config = {
            "bootstrap.servers": kafka_cfg['bootstrap-servers'],
            "schema.registry.url": kafka_cfg['schema-registry-url']
        }

        self.topic = topic
        self.producer = AvroProducer(producer_config,
                                     default_key_schema=key_schema,
                                     default_value_schema=value_schema)

    def send_record(self, record_value, record_key=None):

        key = record_key if record_key else str(uuid.uuid4())
        value = json.loads(record_value)

        try:
            self.producer.produce(topic=self.topic, key=key, value=value)
        except Exception as e:
            print(
                f"Exception while producing record value - {value} to topic - {self.topic}: {e}"
            )
        else:
            print(
                f"Successfully producing record value - {value} to topic - {self.topic}"
            )

        self.producer.flush()
 def test_produce_primitive_string_key(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey',
                          key_schema=key_schema)
예제 #24
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    #
    # TODO: Create a CachedSchemaRegistryClient. Use SCHEMA_REGISTRY_URL.
    #       See: https://github.com/confluentinc/confluent-kafka-python/blob/master/confluent_kafka/avro/cached_schema_registry_client.py#L47
    #
    # schema_registry = TODO

    #
    # TODO: Replace with an AvroProducer.
    #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
    #
    p = AvroProducer(broker_config,
                     default_value_schema=Turnstile.avro_schema,
                     default_key_schema=Turnstile.default_key_schema)
    while True:
        #
        # TODO: Replace with an AvroProducer produce. Make sure to specify the schema!
        #       Tip: Make sure to serialize the ClickEvent with `asdict(ClickEvent())`
        #       See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroProducer
        #
        p.produce(topic=topic_name,
                  value=asdict(Turnstile()),
                  key=str(int(time.time() * 1000)))
        await asyncio.sleep(1.0)
예제 #25
0
    def send_to_kafka():
        Timer(10.0, send_to_kafka).start()
        try:
            print("running")
            avro_producer = AvroProducer(
                {
                    'bootstrap.servers': 'up01:9092,up02:9092,up03:9092',
                    'schema.registry.url': 'http://up04:8081'
                },
                default_key_schema=key_schema,
                default_value_schema=value_schema)

            value = read_from_sense_hat()

            print(value)

            avro_producer.poll(0)

            avro_producer.produce(topic='test_avro_2',
                                  value=value,
                                  key=key,
                                  callback=delivery_report)
            avro_producer.flush()

        except Exception as e:
            logging.error(traceback.format_exc())
예제 #26
0
파일: mce_cli.py 프로젝트: yhjyoon/datahub
def produce(conf, data_file, schema_record):
    """
        Produce MetadataChangeEvent records
    """
    producer = AvroProducer(conf,
                            default_value_schema=avro.load(schema_record))

    print("Producing MetadataChangeEvent records to topic {}. ^c to exit.".
          format(topic))

    with open(data_file) as fp:
        cnt = 0
        while True:
            sample = fp.readline()
            cnt += 1
            if not sample:
                break
            try:
                content = ast.literal_eval(sample.strip())
                producer.produce(topic=topic, value=content)
                producer.poll(0)
                print("  MCE{}: {}".format(cnt, sample))
            except KeyboardInterrupt:
                break
            except ValueError as e:
                print("Message serialization failed {}".format(e))
                break

    print("Flushing records...")
    producer.flush()
예제 #27
0
class KafkaAvroMessageProducer(object):
    def __init__(self, args):
        self.args = args

        self.avro_producer = AvroProducer(
            {
                'bootstrap.servers': self.args.brokers,
                'schema.registry.url': self.args.registry
            },
            default_key_schema=avro.loads(self.args.keyschema),  # key schema
            default_value_schema=avro.loads(self.args.schema)  # value schema
        )

    def produce(self):
        if self.args.input is None:
            # interactive
            for line in sys.stdin:
                clean_line = line.strip()
                if not clean_line:
                    break
                self.produce_one(clean_line, flush=True)
        else:
            with open(self.args.input, 'r') as f:
                for line in f:
                    self.produce_one(line)
            self.avro_producer.flush()

    def produce_one(self, line, flush=False):
        key, value = line.split(self.args.separator)
        self.avro_producer.produce(topic=self.args.topic,
                                   key=json.loads(key),
                                   value=json.loads(value))
        if flush:
            self.avro_producer.flush()
예제 #28
0
def send_record(args):
    if args.record_value is None:
        raise AttributeError("--record-value is not provided.")

    if args.schema_file is None:
        raise AttributeError("--schema-file is not provided.")

    key_schema, value_schema = load_avro_schema_from_file(args.schema_file)

    producer_config = {
        "bootstrap.servers": args.bootstrap_servers,
        "schema.registry.url": args.schema_registry
    }

    producer = AvroProducer(producer_config,
                            default_key_schema=key_schema,
                            default_value_schema=value_schema)

    key = args.record_key if args.record_key else str(uuid.uuid4())
    value = json.loads(args.record_value)

    try:
        producer.produce(topic=args.topic, key=key, value=value)
    except Exception as e:
        print(
            f"Exception while producing record value - {value} to topic - {args.topic}: {e}"
        )
    else:
        print(
            f"Successfully producing record value - {value} to topic - {args.topic}"
        )

    producer.flush()
예제 #29
0
def produce(topic, conf):
    """
        Produce User records
    """

    from confluent_kafka.avro import AvroProducer

    producer = AvroProducer(conf, default_value_schema=record_schema)

    print("Producing user records to topic {}. ^c to exit.".format(topic))
    while True:
        # Instantiate new User, populate fields, produce record, execute callbacks.
        record = User()
        try:
            record.name = input("Enter name: ")
            record.favorite_number = int(input("Enter favorite number: "))
            record.favorite_color = input("Enter favorite color: ")

            # The message passed to the delivery callback will already be serialized.
            # To aid in debugging we provide the original object to the delivery callback.
            producer.produce(topic=topic,
                             value=record.to_dict(),
                             callback=lambda err, msg, obj=record: on_delivery(
                                 err, msg, obj))
            # Serve on_delivery callbacks from previous asynchronous produce()
            producer.poll(0)
        except KeyboardInterrupt:
            break
        except ValueError:
            print("Invalid input, discarding record...")
            continue

    print("\nFlushing records...")
    producer.flush()
 def test_produce_arguments_list(self):
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:0'})
     try:
         producer.produce(topic='test', value={"name": 'abc"'}, key='mykey')
     except Exception as e:
         exc_type, exc_obj, exc_tb = sys.exc_info()
         if exc_type.__name__ == 'SerializerError':
             pass
 def test_produce_with_empty_key_value_with_schema(self):
     key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc"))
     value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc"))
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry,
                             default_key_schema=key_schema,
                             default_value_schema=value_schema)
     producer.produce(topic='test', value=0.0, key='')
예제 #32
0
 def test_produce_with_custom_registry(self):
     schema_registry = MockSchemaRegistryClient()
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     producer = AvroProducer({}, schema_registry=schema_registry)
     producer.produce(topic='test',
                      value={"name": 'abc"'},
                      value_schema=value_schema,
                      key='mykey')
예제 #33
0
    def produce(self, preparedMessageArray):
        prodConf = self.producerConfig()
        producer = AvroProducer(prodConf, default_value_schema=self.avroSchema)
        for preparedMessage in preparedMessageArray:

            producer.produce(topic=self.getTopic(), value=preparedMessage.to_dict(),
                             callback=lambda err, msg, obj=preparedMessage: self.on_delivery(err, msg, obj))
        producer.flush()
async def produce(broker_url, topic, schema_registry_url, *, num_messages):
    schema_registry = CachedSchemaRegistryClient({"url": schema_registry_url})
    conf = {"bootstrap.servers": broker_url, "client.id": socket.gethostname()}
    p = AvroProducer(conf, schema_registry=schema_registry)
    for _ in range(num_messages):
        p.produce(topic=topic,
                  value=asdict(ClickEvent()),
                  value_schema=ClickEvent.schema)
        await asyncio.sleep(1.0)
예제 #35
0
def produce_asgard_message(context, topic, sentiment_message):
    producer = AvroProducer(
        {
            'bootstrap.servers': context.broker,
            'schema.registry.url': context.schema_registry_url
        },
        default_value_schema=context.sentiment_schema)
    producer.produce(topic=topic, value=sentiment_message)
    producer.flush()
 def test_produce_no_value_schema(self):
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(ValueSerializerError):
         # Producer should not accept a value with no schema
         producer.produce(topic='test', value={"name": 'abc"'})
 def test_produce_no_key_schema(self):
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'})
     with self.assertRaises(KeySerializerError):
         # If the key is provided as a dict an avro schema must also be provided
         producer.produce(topic='test', key={"name": 'abc"'})
 def test_produce_value_and_key_schemas(self):
     value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc"))
     producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema,
                             default_key_schema=value_schema)
     with self.assertRaises(ConnectionError):  # Unexistent schema-registry
         producer.produce(topic='test', value={"name": 'abc"'}, key={"name": 'abc"'})
import random

SCHEMA_REGISTRY_URL = 'http://172.17.0.5:8081'
BOOTSTRAP_SERVERS = '172.17.0.4'

AVSC_DIR = os.path.dirname(os.path.realpath(__file__))
KEY_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'primitive_string.avsc'))
VALUE_SCHEMA = avro.load(os.path.join(AVSC_DIR, 'basic_schema.avsc'))

TOPIC = 'avrotopic'
KEY = "mykey"

avroProducer = AvroProducer({'bootstrap.servers': BOOTSTRAP_SERVERS,
                             'schema.registry.url': SCHEMA_REGISTRY_URL},
                            default_key_schema=KEY_SCHEMA,
                            default_value_schema=VALUE_SCHEMA)


for i in xrange(100):
    value = {"name": generate_words(count=1),
             "surname": generate_words(count=2),
             "number": random.randint(0, 100)}

    print str(value)

    avroProducer.produce(topic=TOPIC,
                         value=value,
                         key=KEY)


avroProducer.flush()
 def test_produce_with_empty_value_no_schema(self):
     schema_registry = MockSchemaRegistryClient()
     producer = AvroProducer({}, schema_registry=schema_registry)
     with self.assertRaises(ValueSerializerError):
         producer.produce(topic='test', value='', key='not empty')