def __init__(self, conf): self.producer = Producer(conf)
from config import Config from confluent_kafka import Producer from slack import WebClient from slack.errors import SlackApiError # Bot User OAuth Access Token # used scopes: channels:history, channels:read, chat:write, im:history, mpim:history, users:read token = os.environ["SLACK_BOT_TOKEN"] # Slack API 초기화 sc = WebClient(token) # Kafka Producer 만들기 "localhost:9092" settings = {"bootstrap.servers": Config.MY_SERVER} p = Producer(settings) def acked(err, msg): # callback if err is not None: print("Failed to deliver message: {0}: {1}".format( msg.value(), err.str())) else: print("Message produced: {0}".format(msg.value())) # binary channel = "C01FVD0QD42" # 아래 sc.conversations_list로 id를 확인 # channel_name = "일반" # try: # sc_response = sc.conversations_list(channel=channel)
def initProducer(self): if "kafka_producer" in self.config: self.kafkaProducer = Producer(self.config["kafka_producer"]) else: raise Exception("Cannot find Kafka Producer configuration.")
def __init__( self, storage_key: StorageKey, raw_topic: Optional[str], replacements_topic: Optional[str], max_batch_size: int, max_batch_time_ms: int, bootstrap_servers: Sequence[str], group_id: str, commit_log_topic: Optional[str], auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, processes: Optional[int], input_block_size: Optional[int], output_block_size: Optional[int], commit_retry_policy: Optional[RetryPolicy] = None, profile_path: Optional[str] = None, ) -> None: self.storage = get_writable_storage(storage_key) self.bootstrap_servers = bootstrap_servers stream_loader = self.storage.get_table_writer().get_stream_loader() self.raw_topic: Topic if raw_topic is not None: self.raw_topic = Topic(raw_topic) else: self.raw_topic = Topic( stream_loader.get_default_topic_spec().topic_name) self.replacements_topic: Optional[Topic] if replacements_topic is not None: self.replacements_topic = Topic(replacements_topic) else: replacement_topic_spec = stream_loader.get_replacement_topic_spec() if replacement_topic_spec is not None: self.replacements_topic = Topic( replacement_topic_spec.topic_name) else: self.replacements_topic = None self.commit_log_topic: Optional[Topic] if commit_log_topic is not None: self.commit_log_topic = Topic(commit_log_topic) else: commit_log_topic_spec = stream_loader.get_commit_log_topic_spec() if commit_log_topic_spec is not None: self.commit_log_topic = Topic(commit_log_topic_spec.topic_name) else: self.commit_log_topic = None # XXX: This can result in a producer being built in cases where it's # not actually required. self.producer = Producer({ "bootstrap.servers": ",".join(self.bootstrap_servers), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }) self.metrics = MetricsWrapper( environment.metrics, "consumer", tags={ "group": group_id, "storage": storage_key.value }, ) self.max_batch_size = max_batch_size self.max_batch_time_ms = max_batch_time_ms self.group_id = group_id self.auto_offset_reset = auto_offset_reset self.queued_max_messages_kbytes = queued_max_messages_kbytes self.queued_min_messages = queued_min_messages self.processes = processes self.input_block_size = input_block_size self.output_block_size = output_block_size self.__profile_path = profile_path if commit_retry_policy is None: commit_retry_policy = BasicRetryPolicy( 3, constant_delay(1), lambda e: isinstance(e, KafkaException) and e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR, KafkaError._WAIT_COORD, ), ) self.__commit_retry_policy = commit_retry_policy
from confluent_kafka import Producer from faker import Faker import json p = Producer({ 'bootstrap.servers': 'b-2.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-1.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-3.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092' }) def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) fake = Faker('en_US') def gen_ran_data(i): data = {} data["ID"] = i data["name"] = fake.name() data["address"] = fake.address() data["Email-ID"] = fake.safe_email() return data
def __init__(self, conf, topic_name): self.topic_name = topic_name self.producer = Producer(conf) self.counter = 0 self.running = True
days=1) if update_order_date_modified > orders_dict_date_modified and update_order not in r_new: # Check if > condition is enough (it was => before) dict_update_orders[update_order_id] = { "order_id": update_order_id, "date_created": update_order['date_created_gmt'], "date_modified": update_order['date_modified_gmt'] } orders = dict_new_orders.copy() orders.update(dict_update_orders) return orders, date_created if __name__ == '__main__': create_tables() p = Producer({'bootstrap.servers': 'localhost:9092,localhost:9093'}) date_created, date_updated = get_last_updated_at() sleep_time = 3 loop_value = 0 try: while True: orders_dict = [] loop_value += 1 if loop_value >= 5: # Load orders only every 5th iteration loop_value = -1 orders_dict = get_orders_dict() #print('Sleeping for {0} seconds...'.format(sleep_time)) time.sleep(sleep_time) orders, date_created = get_woocommerce_orders( date_created, date_updated, orders_dict)
def run(self): def fail_fast(err, msg): if err is not None: print("Kafka producer delivery error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) def on_commit(err, partitions): if err is not None: print("Kafka consumer commit error: {}".format(err)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(err) for p in partitions: # check for partition-specific commit errors print(p) if p.error: print("Kafka consumer commit error: {}".format(p.error)) print("Bailing out...") # TODO: should it be sys.exit(-1)? raise KafkaException(p.error) print("Kafka consumer commit successful") pass def on_rebalance(consumer, partitions): for p in partitions: if p.error: raise KafkaException(p.error) print("Kafka partitions rebalanced: {} / {}".format( consumer, partitions)) consumer_conf = self.kafka_config.copy() consumer_conf.update({ 'group.id': self.consumer_group, 'on_commit': fail_fast, # messages don't have offset marked as stored until pushed to # elastic, but we do auto-commit stored offsets to broker 'enable.auto.commit': True, 'enable.auto.offset.store': False, # user code timeout; if no poll after this long, assume user code # hung and rebalance (default: 5min) 'max.poll.interval.ms': 180000, 'default.topic.config': { 'auto.offset.reset': 'latest', }, }) consumer = Consumer(consumer_conf) producer_conf = self.kafka_config.copy() producer_conf.update({ 'delivery.report.only.error': True, 'default.topic.config': { 'request.required.acks': -1, # all brokers must confirm }, }) producer = Producer(producer_conf) consumer.subscribe( [self.consume_topic], on_assign=on_rebalance, on_revoke=on_rebalance, ) print("Kafka consuming {}".format(self.consume_topic)) while True: msg = consumer.poll(self.poll_interval) if not msg: print("nothing new from kafka (poll_interval: {} sec)".format( self.poll_interval)) continue if msg.error(): raise KafkaException(msg.error()) cle = json.loads(msg.value().decode('utf-8')) #print(cle) print("processing changelog index {}".format(cle['index'])) release_ids = [] new_release_ids = [] file_ids = [] fileset_ids = [] webcapture_ids = [] container_ids = [] work_ids = [] release_edits = cle['editgroup']['edits']['releases'] for re in release_edits: release_ids.append(re['ident']) # filter to direct release edits which are not updates if not re.get('prev_revision') and not re.get( 'redirect_ident'): new_release_ids.append(re['ident']) file_edits = cle['editgroup']['edits']['files'] for e in file_edits: file_ids.append(e['ident']) fileset_edits = cle['editgroup']['edits']['filesets'] for e in fileset_edits: fileset_ids.append(e['ident']) webcapture_edits = cle['editgroup']['edits']['webcaptures'] for e in webcapture_edits: webcapture_ids.append(e['ident']) container_edits = cle['editgroup']['edits']['containers'] for e in container_edits: container_ids.append(e['ident']) work_edits = cle['editgroup']['edits']['works'] for e in work_edits: work_ids.append(e['ident']) # TODO: do these fetches in parallel using a thread pool? for ident in set(file_ids): file_entity = self.api.get_file(ident, expand=None) # update release when a file changes # TODO: also fetch old version of file and update any *removed* # release idents (and same for filesets, webcapture updates) release_ids.extend(file_entity.release_ids or []) file_dict = self.api.api_client.sanitize_for_serialization( file_entity) producer.produce( self.file_topic, json.dumps(file_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) # TODO: topic for fileset updates for ident in set(fileset_ids): fileset_entity = self.api.get_fileset(ident, expand=None) # update release when a fileset changes release_ids.extend(file_entity.release_ids or []) # TODO: topic for webcapture updates for ident in set(webcapture_ids): webcapture_entity = self.api.get_webcapture(ident, expand=None) # update release when a webcapture changes release_ids.extend(webcapture_entity.release_ids or []) for ident in set(container_ids): container = self.api.get_container(ident) container_dict = self.api.api_client.sanitize_for_serialization( container) producer.produce( self.container_topic, json.dumps(container_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) for ident in set(release_ids): release = self.api.get_release( ident, expand="files,filesets,webcaptures,container") if release.work_id: work_ids.append(release.work_id) release_dict = self.api.api_client.sanitize_for_serialization( release) producer.produce( self.release_topic, json.dumps(release_dict).encode('utf-8'), key=ident.encode('utf-8'), on_delivery=fail_fast, ) # for ingest requests, filter to "new" active releases with no matched files if release.ident in new_release_ids: ir = release_ingest_request( release, ingest_request_source='fatcat-changelog') if ir and not release.files and self.want_live_ingest( release, ir): producer.produce( self.ingest_file_request_topic, json.dumps(ir).encode('utf-8'), #key=None, on_delivery=fail_fast, ) # send work updates (just ident and changelog metadata) to scholar for re-indexing for ident in set(work_ids): assert ident key = f"work_{ident}" work_ident_dict = dict( key=key, type="fatcat_work", work_ident=ident, updated=cle['timestamp'], fatcat_changelog_index=cle['index'], ) producer.produce( self.work_ident_topic, json.dumps(work_ident_dict).encode('utf-8'), key=key.encode('utf-8'), on_delivery=fail_fast, ) producer.flush() # TODO: publish updated 'work' entities to a topic consumer.store_offsets(message=msg)
def lambda_handler(event, context): covid19_api_raw_data_url = os.getenv('COVID19_API_RAW_DATA_URL') covid19_api_state_data_url = os.getenv('COVID19_API_STATE_DATA_URL') covid19_api_test_data_url = os.getenv('COVID19_API_TEST_DATA_URL') bootstrap_servers = os.getenv('BOOTSTRAP_SERVERS') kafka_client_id = os.getenv('KAFKA_CLIENT_ID') kafka_patient_data_topic_name = os.getenv('KAFKA_PATIENT_DATA_TOPIC_NAME') kafka_state_data_topic_name = os.getenv('KAFKA_STATE_DATA_TOPIC_NAME') kafka_test_data_topic_name = os.getenv('KAFKA_TEST_DATA_TOPIC_NAME') telegram_bot_token = os.getenv('TELEGRAM_BOT_TOKEN') telegram_chat_id = os.getenv('TELEGRAM_CHAT_ID') conf = { 'bootstrap.servers': bootstrap_servers, 'client.id': kafka_client_id, 'linger.ms': '1000' } producer = Producer(conf, logger=logger) bot = Bot(token=telegram_bot_token) # import raw patient data from API resp = requests.get(url=covid19_api_raw_data_url) data = resp.json() # raw_data3.json starting point (27/04/2020) # patient number no longer represents a meaningful number. Just another id, treat it like that. patient_number = 27892 for p in data['raw_data']: try: producer.produce(topic=kafka_patient_data_topic_name, value=json.dumps(p), key=str(patient_number), on_delivery=acked) patient_number += 1 except BufferError: logger.error( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush() raw_data_count = len(data['raw_data']) # import statewise testing data from API resp = requests.get(url=covid19_api_test_data_url) data = resp.json() test_data_count = 0 old = None for p in data['states_tested_data']: try: if p['totaltested'] == '': continue key = dict({u'state': p['state'], u'date': p['updatedon']}) if old is not None and p['state'] == old['state']: p['testreportedtoday'] = str( int(p['totaltested']) - int(old['totaltested'])) if p['positive'] and old[ 'positive']: # this data can be missing p['positivereportedtoday'] = str( int(p['positive']) - int(old['positive'])) else: p['positivereportedtoday'] = "" else: p['testreportedtoday'] = p['totaltested'] if p['positive']: # this data can be missing p['positivereportedtoday'] = p['positive'] else: p['positivereportedtoday'] = "" test_data_count += 1 old = p producer.produce(topic=kafka_test_data_topic_name, value=json.dumps(p), key=json.dumps(key), on_delivery=acked) except BufferError: logger.error( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush() bot.send_message( chat_id=telegram_chat_id, text='Imported {} patients and {} testing data into Kafka'.format( raw_data_count, test_data_count))
from confluent_kafka import Producer p = Producer({'bootstrap.servers': '43.240.97.180:9092'}) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) count = 0 while True: # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) count += 1 # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. try: p.produce('stream-sim', ("message " + str(count)).encode('utf-8'), callback=delivery_report) except BufferError: continue """ p.poll(0)
import time from confluent_kafka import Producer from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField print("start 1p_multiples") broker = 'kafka:9093' topic = 'multiples' conf = {'bootstrap.servers': broker} p = Producer(**conf) s = StringSerializer() print("created KafkaPC") ctx = SerializationContext(topic, MessageField.VALUE) for i in range(10): # casts int to string for StringSerializer/StringDeserializer message = s(str(i*i), ctx) # DeprecationWarning will be resolved in upcoming release # https://github.com/confluentinc/confluent-kafka-python/issues/763 p.produce(topic, message) print(f"Sent message {i*i}") time.sleep(1)
from confluent_kafka import Producer p = Producer({ 'bootstrap.servers': '10.245.146.221:9092,10.245.146.231:9092,10.245.146.232:9092' }) def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) some_data_source = [ "111111111", "222222222", "3333333", "444444444", "555555555", "66666666" ] while True: for data in some_data_source: p.poll(0) p.produce('test', data.encode('utf-8'), callback=delivery_report) p.flush()
'broker.version.fallback': '0.10.0.0', 'api.version.fallback.ms': 0, 'sasl.mechanisms': 'PLAIN', } if config.KAFKA_KEY and config.KAFKA_SECRET: data = { 'security.protocol': 'SASL_SSL', 'sasl.username': config.KAFKA_KEY, 'sasl.password': config.KAFKA_SECRET, **DATA } else: data = DATA p = Producer(data) print("Publishing message...") if len(sys.argv) < 2: print("missing number of values to send") sys.exit(1) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
from confluent_kafka import Consumer, Producer from kafka import KafkaProducer import json import time BROKER_URL = "PLAINTEXT://localhost:9092" TOPIC_NAME = "mitopico" p = Producer({"bootstrap.servers": BROKER_URL}) #p2 = KafkaProducer(bootstrap_servers='localhost:9092') file = '/home/workspace/data/uber.json' with open(file) as f: for line in f: message = json.dumps(line).encode('utf-8') p.produce(TOPIC_NAME, message) time.sleep(1)
import io import logging import os import time from PIL import Image from confluent_kafka import Consumer, Producer producer = Producer({'bootstrap.servers': os.environ.get("KAFKA", "localhost:9092"), "message.send.max.retries": 2}) def run_consumer(queue, msg_handler): consumer = Consumer({ 'bootstrap.servers': os.environ.get("KAFKA", "localhost:9092"), 'group.id': 'manager', 'auto.offset.reset': 'earliest' # earliest _committed_ offset }) _wait_for_topic_to_exist(consumer, queue) logging.info("Subscribing to topic: %s", queue) consumer.subscribe([queue]) while True: logging.debug("Waiting for messages in %r...", queue) msg = consumer.poll() if msg is None: logging.warning("Poll timed out") break
def __init__(self, brokers: str, row_count: int, disable_progress_bar: bool): from confluent_kafka import Producer self.producer = Producer({"bootstrap.servers": brokers}) super().__init__(brokers, row_count, disable_progress_bar)
async def produce(topic_name): """Produces data into the Kafka Topic""" p = Producer({"bootstrap.servers": BROKER_URL}) while True: p.produce(topic_name, ClickEvent().serialize()) await asyncio.sleep(1.0)
def __init__(self, broker, topic): self.broker = broker self.topic = topic self.bootstrap_servers = {'bootstrap.servers': self.broker} self.producer = Producer(self.bootstrap_servers)
def __init__(self, topic, rate, conf): """ SoakClient constructor. conf is the client configuration """ self.topic = topic self.rate = rate self.disprate = int(rate * 10) self.run = True self.stats_cnt = {'producer': 0, 'consumer': 0} self.start_time = time.time() self.last_rusage = None self.last_rusage_time = None self.proc = psutil.Process(os.getpid()) self.logger = logging.getLogger('soakclient') self.logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) self.logger.addHandler(handler) # Construct a unique id to use for metrics hostname so that # multiple instances of the SoakClient can run on the same machine. hostname = datadog.util.hostname.get_hostname() self.hostname = "py-{}-{}".format(hostname, self.topic) self.logger.info("SoakClient id {}".format(self.hostname)) if 'group.id' not in conf: # Generate a group.id bound to this client and python version conf['group.id'] = 'soakclient-{}-{}-{}'.format( self.hostname, version()[0], sys.version.split(' ')[0]) # Separate datadog config from client config datadog_conf = {k[len("datadog."):]: conf[k] for k in conf.keys() if k.startswith("datadog.")} conf = {k: v for k, v in conf.items() if not k.startswith("datadog.")} # Set up datadog agent self.init_datadog(datadog_conf) def filter_config(conf, filter_out, strip_prefix): len_sp = len(strip_prefix) out = {} for k, v in conf.items(): if len([x for x in filter_out if k.startswith(x)]) > 0: continue if k.startswith(strip_prefix): k = k[len_sp:] out[k] = v return out # Create topic (might already exist) aconf = filter_config(conf, ["consumer.", "producer."], "admin.") self.create_topic(self.topic, aconf) # # Create Producer and Consumer, each running in its own thread. # conf['stats_cb'] = self.stats_cb conf['statistics.interval.ms'] = 10000 # Producer pconf = filter_config(conf, ["consumer.", "admin."], "producer.") pconf['error_cb'] = self.producer_error_cb self.producer = Producer(pconf) # Consumer cconf = filter_config(conf, ["producer.", "admin."], "consumer.") cconf['error_cb'] = self.consumer_error_cb cconf['on_commit'] = self.consumer_commit_cb self.logger.info("consumer: using group.id {}".format(cconf['group.id'])) self.consumer = Consumer(cconf) # Create and start producer thread self.producer_thread = threading.Thread(target=self.producer_thread_main) self.producer_thread.start() # Create and start consumer thread self.consumer_thread = threading.Thread(target=self.consumer_thread_main) self.consumer_thread.start()
'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******', 'group.id': str(uuid.uuid1( )), # this will create a new consumer group on each invocation. 'auto.offset.reset': 'earliest' }) producer = Producer({ 'bootstrap.servers': 'pkc-ep9mm.us-east-2.aws.confluent.cloud:9092', 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******' }) # # This function is run in its own thread to do all processing associated # with preparing an upload of a new file, and does everything except upload # the chunks and metadata themselves, which is handled by a queue. # def upload_file(file): global kcs, chunk_size, upload_queue
import uuid import json from confluent_kafka import Producer p = Producer({'bootstrap.servers': 'localhost:9092', 'group.id': 'mygroup'}) TOPIC = "job" class ProducerError(Exception): pass def produce(topic, key, value): p.produce(topic, key=key, value=json.dumps(value).encode()) p.poll(0.5) def main(): message = {"method": "ingest_covid_data", "params": {}} produce(TOPIC, key=str(uuid.uuid4()).encode(), value=message) main()
def Producer(self, options): self.logThis("Initialising a producer on server " + str(options.get("bootstrap.servers"))) return Producer(options)
from confluent_kafka import Producer if __name__ == '__main__': # Parse the command line. parser = ArgumentParser() parser.add_argument('config_file', type=FileType('r')) args = parser.parse_args() # Parse the configuration. # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md config_parser = ConfigParser() config_parser.read_file(args.config_file) # It's getting_started.ini config = dict(config_parser['default']) # Create Producer instance producer = Producer(config) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: print('ERROR: Message failed delivery: {}'.format(err)) else: print("Produced event to topic {topic}: key = {key:12} value = {value:12}".format( topic=msg.topic(), key=msg.key().decode('utf-8'), value=msg.value().decode('utf-8'))) # Produce data by selecting random values from these lists. topic = "purchases" user_ids = ['eabara', 'jsmith', 'sgarcia', 'jbernard', 'htanaka', 'awalther'] products = ['book', 'alarm clock', 't-shirts', 'gift card', 'batteries']
import json import random from confluent_kafka import Producer p = Producer({'bootstrap.servers': '34.238.53.42:9092'}) def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) path_data_pokemon = "data/pokedex.json" data_s = open(path_data_pokemon, 'r' ) data_j = json.load(data_s) data_s.close() topic = "pokedex" try: while True: index = random.randint(0,len(data_j)-1) pokemon = data_j[index] print(pokemon) p.produce(topic, str(pokemon), callback=delivery_report) p.poll(2.0) except KeyboardInterrupt: pass
# 用來接收從Consumer instance發出的error訊息 def error_cb(err): print('Error: %s' % err) # 主程式進入點 if __name__ == '__main__': # 步驟1. 設定要連線到Kafka集群的相關設定 props = { # Kafka集群在那裡? 'bootstrap.servers': 'localhost:9092', # <-- 置換成要連接的Kafka集群 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Producer的實例 producer = Producer(**props) # 步驟3. 指定想要發佈訊息的topic名稱 topicName = 'ak03.fourpartition' msgCount = 10000 try: print('Start sending messages ...') # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers]) for i in range(0, msgCount): producer.produce(topicName, key=str(i), value='msg_' + str(i)) producer.poll(0) # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer print('key={}, value={}'.format(str(i), 'msg_' + str(i))) time.sleep(3) # 讓主執行緒停個3秒 print('Send ' + str(msgCount) + ' messages to Kafka') except BufferError as e: # 錯誤處理
k_admin_client = kafka_admin.AdminClient({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS}) kafka_consumer = Consumer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, 'group.id': f"kafka-eof_{str(uuid.uuid4())}", 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.auto.offset.store': False, 'enable.partition.eof': False }) kafka_consumer.subscribe([KAFKA_TOPIC_IN_0, KAFKA_TOPIC_IN_1]) # kafka_consumer.assign([TopicPartition(KAFKA_TOPIC_IN_0), TopicPartition(KAFKA_TOPIC_IN_1)]) # create a Kafka producer kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, "transactional.id": 'eos-transactions1.py'}) @pytest.mark.tryfirst() def delivery_report(err, msg): """ Delivery callback for Kafka Produce. Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: if VERBOSE: # get the sent message using msg.value() print(f"Message '{msg.key().decode('utf-8')}' \tdelivered to topic '{msg.topic()}' [{msg.partition()}].") @pytest.mark.tryfirst()
from confluent_kafka import Producer import socket bootstrap_servers = "localhost:9092" topic = "q-data" conf = { "bootstrap.servers": bootstrap_servers, "client.id": socket.gethostname() } producer = Producer(conf) def acked(err, msg): if err is not None: print("Failed to deliver message: %s: %s" % (str(msg), str(err))) else: print("Message produced: %s" % (str(msg))) producer.produce(topic, key="42", value="73", callback=acked) # Wait up to 1 second for events. Callbacks will be invoked during # this method call if the message is acknowledged. producer.poll(1)
from confluent_kafka import Producer import time p = Producer({ 'bootstrap.servers': 'localhost:9092', "queue.buffering.max.ms": 1 }) # queue.buffering.max.ms": 2 def delivery_report( err, msg, ): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message offset: {} delivered to {} [{}]'.format( msg.offset(), msg.topic(), msg.partition())) m_count = 0 called = True while m_count <= 10000: #p.poll(0.01) m_count += 1 #if m_count%30 == 0:
''' Because the KPI file is big, we emulate by reading chunk, using iterator and chunksize ''' INPUT_DATA_FILE = args.input_file chunksize = int(args.chunksize) sleeptime = int(args.sleeptime) KAFKA_TOPIC = args.topic ''' the time record is "TIME" we read data by chunk so we can handle a big sample data file ''' input_data = pd.read_csv(INPUT_DATA_FILE, parse_dates=['TIME'], iterator=True, chunksize=chunksize) kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVER}) for chunk_data in input_data: ''' now process each chunk ''' chunk = chunk_data.dropna() for index, row in chunk.iterrows(): ''' Assume that when some data is available, we send it to Kafka in JSON ''' json_data = json.dumps(row.to_dict(), default=datetime_converter) #check if any event/error sent print(f'DEBUG: Send {json_data} to Kafka') kafka_producer.produce(KAFKA_TOPIC, json_data.encode('utf-8'), callback=kafka_delivery_error)
""" Partially from https://github.com/confluentinc/confluent-kafka-python, code was/is under licence Apache 2.0. """ from confluent_kafka import Producer from read_from_BigQuery import get_data_from_BigQuery_continuous_stateful # TODO move to env files p = Producer({ 'bootstrap.servers': 'ec2-52-11-165-61.us-west-2.compute.amazonaws.com,\ ec2-52-10-3-49.us-west-2.compute.amazonaws.com,\ ec2-34-218-39-83.us-west-2.compute.amazonaws.com' }) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to topic {} partition [{}]'.format( msg.topic(), msg.partition())) for data in get_data_from_BigQuery_continuous_stateful(100): # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has